diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3559,18 +3559,29 @@ if ((LoC >> 31) == HiC) return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); - // If vl is equal to VLMAX and Hi constant is equal to Lo, we could use - // vmv.v.x whose EEW = 32 to lower it. - if (LoC == HiC && (isAllOnesConstant(VL) || - (isa(VL) && - cast(VL)->getReg() == RISCV::X0))) { - MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - // TODO: if vl <= min(VLMAX), we can also do this. But we could not - // access the subtarget here now. - auto InterVec = DAG.getNode( - RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, - DAG.getRegister(RISCV::X0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); + // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, + // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use + // vlmax vsetvli or vsetivli to change the VL. + // FIXME: Support larger constants? + // FIXME: Support non-constant VLs by saturating? + if (LoC == HiC) { + SDValue NewVL; + if (isAllOnesConstant(VL) || + (isa(VL) && + cast(VL)->getReg() == RISCV::X0)) + NewVL = DAG.getRegister(RISCV::X0, MVT::i32); + else if (isa(VL) && + isUInt<4>(cast(VL)->getZExtValue())) + NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); + + if (NewVL) { + MVT InterVT = + MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); + auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, + DAG.getUNDEF(InterVT), Lo, + DAG.getRegister(RISCV::X0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); + } } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1385,22 +1385,10 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1414,7 +1402,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4, v0.t ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1434,33 +1422,36 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v2i64: @@ -1529,22 +1520,10 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1558,7 +1537,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1578,33 +1557,36 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v2i64_unmasked: @@ -1677,22 +1659,10 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1706,7 +1676,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1726,33 +1696,36 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v4i64: @@ -1821,22 +1794,10 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1850,7 +1811,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4 ; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1870,33 +1831,36 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v4i64_unmasked: @@ -1969,22 +1933,10 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -1998,7 +1950,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2018,33 +1970,36 @@ ; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v8i64: @@ -2113,22 +2068,10 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2142,7 +2085,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2162,33 +2105,36 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -181,24 +181,12 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { ; RV32-LABEL: bitreverse_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v9, v8, a1 @@ -209,7 +197,7 @@ ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: mv a4, sp +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vlse64.v v11, (a4), zero ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v10, a4 @@ -226,31 +214,40 @@ ; RV32-NEXT: vand.vv v8, v8, v11 ; RV32-NEXT: vsll.vi v8, v8, 8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vlse64.v v11, (a1), zero ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v9, v9, v11 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vse64.v v8, (a0) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_v2i64: @@ -741,24 +738,12 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32-LABEL: bitreverse_v4i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -32 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32 +; LMULMAX2-RV32-NEXT: addi sp, sp, -16 +; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: sw zero, 4(sp) +; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: lui a1, 1044480 -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32-NEXT: lui a1, 61681 -; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32-NEXT: lui a1, 209715 -; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32-NEXT: lui a1, 349525 -; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: sw a1, 12(sp) ; LMULMAX2-RV32-NEXT: sw a1, 8(sp) ; LMULMAX2-RV32-NEXT: li a1, 56 ; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 @@ -769,7 +754,7 @@ ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: mv a4, sp +; LMULMAX2-RV32-NEXT: addi a4, sp, 8 ; LMULMAX2-RV32-NEXT: vlse64.v v14, (a4), zero ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 @@ -786,31 +771,40 @@ ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX2-RV32-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32-NEXT: vlse64.v v14, (a1), zero ; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero +; LMULMAX2-RV32-NEXT: lui a1, 61681 +; LMULMAX2-RV32-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 2 +; LMULMAX2-RV32-NEXT: lui a1, 209715 +; LMULMAX2-RV32-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32-NEXT: lui a1, 349525 +; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 32 +; LMULMAX2-RV32-NEXT: addi sp, sp, 16 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: bitreverse_v4i64: @@ -877,26 +871,14 @@ ; ; LMULMAX1-RV32-LABEL: bitreverse_v4i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -32 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 +; LMULMAX1-RV32-NEXT: addi sp, sp, -16 +; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 16 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: sw zero, 4(sp) +; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: sw a2, 0(sp) -; LMULMAX1-RV32-NEXT: lui a2, 61681 -; LMULMAX1-RV32-NEXT: addi a2, a2, -241 -; LMULMAX1-RV32-NEXT: sw a2, 28(sp) -; LMULMAX1-RV32-NEXT: sw a2, 24(sp) -; LMULMAX1-RV32-NEXT: lui a2, 209715 -; LMULMAX1-RV32-NEXT: addi a2, a2, 819 -; LMULMAX1-RV32-NEXT: sw a2, 20(sp) -; LMULMAX1-RV32-NEXT: sw a2, 16(sp) -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) ; LMULMAX1-RV32-NEXT: sw a2, 8(sp) ; LMULMAX1-RV32-NEXT: li a2, 56 ; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 @@ -907,7 +889,7 @@ ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: mv a5, sp +; LMULMAX1-RV32-NEXT: addi a5, sp, 8 ; LMULMAX1-RV32-NEXT: vlse64.v v12, (a5), zero ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 @@ -924,25 +906,34 @@ ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a3 ; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: addi a6, sp, 24 -; LMULMAX1-RV32-NEXT: vlse64.v v13, (a6), zero ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: addi a6, sp, 16 -; LMULMAX1-RV32-NEXT: vlse64.v v11, (a6), zero +; LMULMAX1-RV32-NEXT: lui a6, 61681 +; LMULMAX1-RV32-NEXT: addi a6, a6, -241 +; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: addi a6, sp, 8 -; LMULMAX1-RV32-NEXT: vlse64.v v14, (a6), zero +; LMULMAX1-RV32-NEXT: lui a6, 209715 +; LMULMAX1-RV32-NEXT: addi a6, a6, 819 +; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v13, a6 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 +; LMULMAX1-RV32-NEXT: lui a6, 349525 +; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 +; LMULMAX1-RV32-NEXT: vsetvli a7, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v14 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 @@ -969,13 +960,13 @@ ; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 @@ -985,7 +976,7 @@ ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: addi sp, sp, 32 +; LMULMAX1-RV32-NEXT: addi sp, sp, 16 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1449,24 +1449,6 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t @@ -1483,15 +1465,17 @@ ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1499,19 +1483,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v2i64: @@ -1568,24 +1553,6 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -1602,15 +1569,17 @@ ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1618,19 +1587,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v2i64_unmasked: @@ -1691,24 +1661,6 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t @@ -1725,15 +1677,17 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1741,19 +1695,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v4i64: @@ -1810,24 +1765,6 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -1844,15 +1781,17 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1860,19 +1799,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v4i64_unmasked: @@ -1933,24 +1873,6 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t @@ -1967,15 +1889,17 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1983,19 +1907,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v8i64: @@ -2052,24 +1977,6 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -2086,15 +1993,17 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2102,19 +2011,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v8i64_unmasked: @@ -4690,24 +4600,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t @@ -4724,15 +4616,17 @@ ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4740,19 +4634,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v2i64: @@ -4809,24 +4704,6 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -4843,15 +4720,17 @@ ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4859,19 +4738,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v2i64_unmasked: @@ -4930,24 +4810,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t @@ -4964,15 +4826,17 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4980,19 +4844,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v4i64: @@ -5049,24 +4914,6 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -5083,15 +4930,17 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -5099,19 +4948,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v4i64_unmasked: @@ -5170,24 +5020,6 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t @@ -5204,15 +5036,17 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -5220,19 +5054,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v8i64: @@ -5289,24 +5124,6 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -5323,15 +5140,17 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -5339,19 +5158,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -460,25 +460,8 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: ctlz_v2i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 @@ -493,29 +476,40 @@ ; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: ctlz_v2i64: @@ -1160,25 +1154,8 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: ctlz_v4i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 @@ -1193,29 +1170,40 @@ ; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: ctlz_v4i64: @@ -1788,25 +1776,8 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v2i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 2 @@ -1821,29 +1792,40 @@ ; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v2i64: @@ -2458,25 +2440,8 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: ctlz_zero_undef_v4i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: lui a1, 349525 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 209715 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 61681 -; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) -; LMULMAX2-RV32I-NEXT: lui a1, 4112 -; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 2 @@ -2491,29 +2456,40 @@ ; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32I-NEXT: lui a1, 349525 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 ; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12 +; LMULMAX2-RV32I-NEXT: lui a1, 209715 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 61681 +; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: lui a1, 4112 +; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: ctlz_zero_undef_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1073,35 +1073,19 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1109,19 +1093,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v2i64: @@ -1164,35 +1149,19 @@ define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1200,19 +1169,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v2i64_unmasked: @@ -1259,35 +1229,19 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1295,19 +1249,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v4i64: @@ -1350,35 +1305,19 @@ define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1386,19 +1325,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v4i64_unmasked: @@ -1445,35 +1385,19 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1481,19 +1405,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v8i64: @@ -1536,35 +1461,19 @@ define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1572,19 +1481,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -288,49 +288,42 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; LMULMAX2-RV32-LABEL: ctpop_v2i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -32 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9 +; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v9 +; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v9, v11, v9 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vand.vv v9, v8, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32-NEXT: mv a1, sp -; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: li a1, 56 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: ctpop_v2i64: @@ -371,49 +364,42 @@ ; ; LMULMAX1-RV32-LABEL: ctpop_v2i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -32 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX1-RV32-NEXT: sw a1, 28(sp) -; LMULMAX1-RV32-NEXT: sw a1, 24(sp) +; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 +; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 -; LMULMAX1-RV32-NEXT: sw a1, 20(sp) -; LMULMAX1-RV32-NEXT: sw a1, 16(sp) +; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 +; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a1, a1, -241 -; LMULMAX1-RV32-NEXT: sw a1, 12(sp) -; LMULMAX1-RV32-NEXT: sw a1, 8(sp) +; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: lui a1, 4112 ; LMULMAX1-RV32-NEXT: addi a1, a1, 257 -; LMULMAX1-RV32-NEXT: sw a1, 4(sp) -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: addi a1, sp, 24 -; LMULMAX1-RV32-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV32-NEXT: addi a1, sp, 16 -; LMULMAX1-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v9, v11, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vand.vv v9, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV32-NEXT: addi a1, sp, 8 -; LMULMAX1-RV32-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV32-NEXT: mv a1, sp -; LMULMAX1-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: li a1, 56 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi sp, sp, 32 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ctpop_v2i64: @@ -843,49 +829,42 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32-LABEL: ctpop_v4i64: ; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: addi sp, sp, -32 -; LMULMAX2-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1 +; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v10, v14, v10 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v12 -; LMULMAX2-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32-NEXT: mv a1, sp -; LMULMAX2-RV32-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 4 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: li a1, 56 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: ctpop_v4i64: @@ -926,46 +905,40 @@ ; ; LMULMAX1-RV32-LABEL: ctpop_v4i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: addi sp, sp, -32 -; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 -; LMULMAX1-RV32-NEXT: sw a2, 28(sp) -; LMULMAX1-RV32-NEXT: sw a2, 24(sp) +; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 -; LMULMAX1-RV32-NEXT: sw a2, 20(sp) -; LMULMAX1-RV32-NEXT: sw a2, 16(sp) -; LMULMAX1-RV32-NEXT: lui a2, 61681 -; LMULMAX1-RV32-NEXT: addi a2, a2, -241 -; LMULMAX1-RV32-NEXT: sw a2, 12(sp) -; LMULMAX1-RV32-NEXT: sw a2, 8(sp) -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: sw a2, 4(sp) -; LMULMAX1-RV32-NEXT: sw a2, 0(sp) -; LMULMAX1-RV32-NEXT: addi a2, sp, 24 -; LMULMAX1-RV32-NEXT: vlse64.v v10, (a2), zero -; LMULMAX1-RV32-NEXT: addi a2, sp, 16 -; LMULMAX1-RV32-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9 -; LMULMAX1-RV32-NEXT: addi a2, sp, 8 -; LMULMAX1-RV32-NEXT: vlse64.v v12, (a2), zero -; LMULMAX1-RV32-NEXT: mv a2, sp -; LMULMAX1-RV32-NEXT: vlse64.v v13, (a2), zero -; LMULMAX1-RV32-NEXT: vsrl.vi v14, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v14 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: lui a2, 61681 +; LMULMAX1-RV32-NEXT: addi a2, a2, -241 +; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a2, 257 +; LMULMAX1-RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: li a2, 56 ; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2 @@ -983,7 +956,6 @@ ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: addi sp, sp, 32 ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ctpop_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1233,39 +1233,23 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1273,19 +1257,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v2i64: @@ -1332,39 +1317,23 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1372,19 +1341,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v2i64_unmasked: @@ -1435,39 +1405,23 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1475,19 +1429,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v4i64: @@ -1534,39 +1489,23 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1574,19 +1513,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v4i64_unmasked: @@ -1637,39 +1577,23 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1677,19 +1601,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v8i64: @@ -1736,39 +1661,23 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1776,19 +1685,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v8i64_unmasked: @@ -3978,39 +3888,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4018,19 +3912,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64: @@ -4077,39 +3972,23 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4117,19 +3996,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked: @@ -4178,39 +4058,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4218,19 +4082,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64: @@ -4277,39 +4142,23 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4317,19 +4166,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked: @@ -4378,39 +4228,23 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4418,19 +4252,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64: @@ -4477,39 +4312,23 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4517,19 +4336,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -456,52 +456,46 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: cttz_v2i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 209715 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 61681 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 4112 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: cttz_v2i64: @@ -1115,52 +1109,46 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: cttz_v4i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 209715 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 61681 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 4112 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: cttz_v4i64: @@ -1723,52 +1711,46 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v2i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v9, v9, v10 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 209715 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v9 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 61681 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: lui a1, 4112 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v11, v9 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vand.vv v9, v8, v10 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v9, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v9, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v11, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v11 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v2i64: @@ -2342,52 +2324,46 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32I-LABEL: cttz_zero_undef_v4i64: ; LMULMAX2-RV32I: # %bb.0: -; LMULMAX2-RV32I-NEXT: addi sp, sp, -32 ; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32I-NEXT: li a1, 1 +; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 -; LMULMAX2-RV32I-NEXT: sw a1, 28(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 24(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v10, v10, v12 +; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 209715 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 819 -; LMULMAX2-RV32I-NEXT: sw a1, 20(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 16(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v12, v8, v10 +; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v12, v8 +; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 4 +; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 61681 ; LMULMAX2-RV32I-NEXT: addi a1, a1, -241 -; LMULMAX2-RV32I-NEXT: sw a1, 12(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 8(sp) +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: lui a1, 4112 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 257 -; LMULMAX2-RV32I-NEXT: sw a1, 4(sp) -; LMULMAX2-RV32I-NEXT: sw a1, 0(sp) -; LMULMAX2-RV32I-NEXT: li a1, 1 -; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 24 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: addi a1, sp, 16 -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v14, v10 -; LMULMAX2-RV32I-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vand.vv v10, v8, v12 -; LMULMAX2-RV32I-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v12 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32I-NEXT: addi a1, sp, 8 -; LMULMAX2-RV32I-NEXT: vlse64.v v10, (a1), zero -; LMULMAX2-RV32I-NEXT: mv a1, sp -; LMULMAX2-RV32I-NEXT: vlse64.v v12, (a1), zero -; LMULMAX2-RV32I-NEXT: vsrl.vi v14, v8, 4 -; LMULMAX2-RV32I-NEXT: vadd.vv v8, v8, v14 -; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32I-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: li a1, 56 ; LMULMAX2-RV32I-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32I-NEXT: vse64.v v8, (a0) -; LMULMAX2-RV32I-NEXT: addi sp, sp, 32 ; LMULMAX2-RV32I-NEXT: ret ; ; LMULMAX2-RV64I-LABEL: cttz_zero_undef_v4i64: