diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3559,9 +3559,11 @@ if ((LoC >> 31) == HiC) return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); - // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use + // If vl is equal to VLMAX and Hi constant is equal to Lo, we could use // vmv.v.x whose EEW = 32 to lower it. - if (LoC == HiC && isAllOnesConstant(VL)) { + if (LoC == HiC && (isAllOnesConstant(VL) || + (isa(VL) && + cast(VL)->getReg() == RISCV::X0))) { MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); // TODO: if vl <= min(VLMAX), we can also do this. But we could not // access the subtarget here now. diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1070,22 +1070,10 @@ define @bitreverse_nxv1i64( %va) { ; RV32-LABEL: bitreverse_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -1097,7 +1085,7 @@ ; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: mv a3, sp +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v10, v10, a3 @@ -1114,30 +1102,39 @@ ; RV32-NEXT: vand.vv v8, v8, v11 ; RV32-NEXT: vsll.vi v8, v8, 8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v11, (a0), zero ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v9, v9, v11 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv1i64: @@ -1213,22 +1210,10 @@ define @bitreverse_nxv2i64( %va) { ; RV32-LABEL: bitreverse_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -1240,7 +1225,7 @@ ; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: mv a3, sp +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v12, v12, a3 @@ -1257,30 +1242,39 @@ ; RV32-NEXT: vand.vv v8, v8, v14 ; RV32-NEXT: vsll.vi v8, v8, 8 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v14, (a0), zero ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vv v10, v10, v14 -; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv2i64: @@ -1356,22 +1350,10 @@ define @bitreverse_nxv4i64( %va) { ; RV32-LABEL: bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma @@ -1383,7 +1365,7 @@ ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: mv a3, sp +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v16, v16, a3 @@ -1400,30 +1382,39 @@ ; RV32-NEXT: vand.vv v8, v8, v20 ; RV32-NEXT: vsll.vi v8, v8, 8 ; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v20, (a0), zero ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vv v12, v12, v20 -; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv4i64: @@ -1499,27 +1490,15 @@ define @bitreverse_nxv8i64( %va) { ; RV32-LABEL: bitreverse_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 44(sp) -; RV32-NEXT: sw a0, 40(sp) -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 36(sp) -; RV32-NEXT: sw a0, 32(sp) -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a0 @@ -1529,58 +1508,67 @@ ; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: vand.vx v24, v24, a2 ; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: vsll.vx v24, v24, a1 +; RV32-NEXT: vand.vx v0, v8, a2 +; RV32-NEXT: vsll.vx v0, v0, a1 ; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vor.vv v24, v16, v24 -; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 40 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2189,22 +2189,10 @@ define @vp_bitreverse_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2218,7 +2206,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4, v0.t ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2238,33 +2226,36 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv1i64: @@ -2339,22 +2330,10 @@ define @vp_bitreverse_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2368,7 +2347,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2388,33 +2367,36 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked: @@ -2493,22 +2475,10 @@ define @vp_bitreverse_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2522,7 +2492,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2542,33 +2512,36 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv2i64: @@ -2643,22 +2616,10 @@ define @vp_bitreverse_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2672,7 +2633,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4 ; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2692,33 +2653,36 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked: @@ -2797,22 +2761,10 @@ define @vp_bitreverse_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2826,7 +2778,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2846,33 +2798,36 @@ ; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64: @@ -2947,22 +2902,10 @@ define @vp_bitreverse_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2976,7 +2919,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2996,33 +2939,36 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked: @@ -3101,28 +3047,16 @@ define @vp_bitreverse_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -3132,7 +3066,7 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t @@ -3140,15 +3074,15 @@ ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t @@ -3156,22 +3090,22 @@ ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: vand.vx v24, v24, a4, v0.t @@ -3179,41 +3113,44 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t @@ -3223,7 +3160,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv7i64: @@ -3315,27 +3252,15 @@ define @vp_bitreverse_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -3345,19 +3270,19 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill @@ -3371,31 +3296,34 @@ ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -3404,7 +3332,7 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked: @@ -3483,28 +3411,16 @@ define @vp_bitreverse_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -3514,7 +3430,7 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t @@ -3522,15 +3438,15 @@ ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t @@ -3538,22 +3454,22 @@ ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: vand.vx v24, v24, a4, v0.t @@ -3561,41 +3477,44 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t @@ -3605,7 +3524,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv8i64: @@ -3697,27 +3616,15 @@ define @vp_bitreverse_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -3727,19 +3634,19 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill @@ -3753,31 +3660,34 @@ ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -3786,7 +3696,7 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1509,24 +1509,6 @@ define @ctlz_nxv1i64( %va) { ; RV32I-LABEL: ctlz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v9 @@ -1542,28 +1524,39 @@ ; RV32I-NEXT: vsrl.vx v9, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v9 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v11 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv1i64: @@ -1656,24 +1649,6 @@ define @ctlz_nxv2i64( %va) { ; RV32I-LABEL: ctlz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v10 @@ -1689,28 +1664,39 @@ ; RV32I-NEXT: vsrl.vx v10, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v10 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v14 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv2i64: @@ -1803,24 +1789,6 @@ define @ctlz_nxv4i64( %va) { ; RV32I-LABEL: ctlz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v12 @@ -1836,28 +1804,39 @@ ; RV32I-NEXT: vsrl.vx v12, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v12 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v16, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v20 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv4i64: @@ -1950,24 +1929,6 @@ define @ctlz_nxv8i64( %va) { ; RV32I-LABEL: ctlz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -1983,28 +1944,39 @@ ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v24, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v16, v16, v24 ; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v24, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v24 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v0 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv8i64: @@ -3517,24 +3489,6 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v9 @@ -3550,28 +3504,39 @@ ; RV32I-NEXT: vsrl.vx v9, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v9 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v11 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv1i64: @@ -3659,24 +3624,6 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v10 @@ -3692,28 +3639,39 @@ ; RV32I-NEXT: vsrl.vx v10, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v10 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v14 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv2i64: @@ -3801,24 +3759,6 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v12 @@ -3834,28 +3774,39 @@ ; RV32I-NEXT: vsrl.vx v12, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v12 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v16, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v20 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv4i64: @@ -3943,24 +3894,6 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -3976,28 +3909,39 @@ ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v24, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v16, v16, v24 ; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v24, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v24 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v0 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -862,47 +862,40 @@ define @ctpop_nxv1i64( %va) { ; RV32-LABEL: ctpop_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsub.vv v8, v8, v9 ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vand.vv v10, v8, v9 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 1 -; RV32-NEXT: vand.vv v9, v11, v9 -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vadd.vv v8, v9, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsrl.vi v11, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v11 -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv1i64: @@ -952,47 +945,40 @@ define @ctpop_nxv2i64( %va) { ; RV32-LABEL: ctpop_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vand.vv v12, v8, v10 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v10 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 1 -; RV32-NEXT: vand.vv v10, v14, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vadd.vv v8, v10, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsrl.vi v14, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v14 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv2i64: @@ -1042,47 +1028,40 @@ define @ctpop_nxv4i64( %va) { ; RV32-LABEL: ctpop_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vsub.vv v8, v8, v12 ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vand.vv v16, v8, v12 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v12 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 1 -; RV32-NEXT: vand.vv v12, v20, v12 -; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v12, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsrl.vi v20, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v20 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv4i64: @@ -1132,47 +1111,40 @@ define @ctpop_nxv8i64( %va) { ; RV32-LABEL: ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -1759,35 +1759,19 @@ define @vp_ctpop_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1795,19 +1779,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv1i64: @@ -1856,35 +1841,19 @@ define @vp_ctpop_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1892,19 +1861,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv1i64_unmasked: @@ -1957,35 +1927,19 @@ define @vp_ctpop_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1993,19 +1947,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv2i64: @@ -2054,35 +2009,19 @@ define @vp_ctpop_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2090,19 +2029,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv2i64_unmasked: @@ -2155,35 +2095,19 @@ define @vp_ctpop_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2191,19 +2115,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv4i64: @@ -2252,35 +2177,19 @@ define @vp_ctpop_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2288,19 +2197,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv4i64_unmasked: @@ -2353,35 +2263,19 @@ define @vp_ctpop_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2389,19 +2283,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64: @@ -2450,35 +2345,19 @@ define @vp_ctpop_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2486,19 +2365,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked: @@ -2551,35 +2431,19 @@ define @vp_ctpop_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2587,19 +2451,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64: @@ -2648,35 +2513,19 @@ define @vp_ctpop_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2684,19 +2533,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked: @@ -2749,203 +2599,238 @@ define @vp_ctpop_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v8, v0.t +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB46_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB46_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv16i64: @@ -3061,28 +2946,12 @@ define @vp_ctpop_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -3090,91 +2959,95 @@ ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vsub.vv v24, v16, v24 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vand.vv v16, v24, v0 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vadd.vv v24, v16, v24 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsrl.vi v16, v24, 4 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v16, v16, a2 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v24, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v0 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsrl.vx v8, v8, a2 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1448,51 +1448,44 @@ define @cttz_nxv1i64( %va) { ; RV32I-LABEL: cttz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32I-NEXT: vsub.vx v9, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v11 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv1i64: @@ -1618,51 +1611,44 @@ define @cttz_nxv2i64( %va) { ; RV32I-LABEL: cttz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32I-NEXT: vsub.vx v10, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v14 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv2i64: @@ -1788,51 +1774,44 @@ define @cttz_nxv4i64( %va) { ; RV32I-LABEL: cttz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32I-NEXT: vsub.vx v12, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v16, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v20 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv4i64: @@ -1958,51 +1937,44 @@ define @cttz_nxv8i64( %va) { ; RV32I-LABEL: cttz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v24, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v16, v16, v24 ; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v24, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v24 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v0 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv8i64: @@ -3467,51 +3439,44 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32I-NEXT: vsub.vx v9, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsrl.vi v9, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsub.vv v8, v8, v9 -; RV32I-NEXT: vand.vv v9, v8, v10 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v10, v8, v9 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v9, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: vsrl.vi v11, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v11 ; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vmul.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vsrl.vi v9, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32I-NEXT: vmv.v.x v9, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v9 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv1i64: @@ -3594,51 +3559,44 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32I-NEXT: vsub.vx v10, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsrl.vi v10, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v12 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v12, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v10, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: vsrl.vi v14, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v14 ; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vmul.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vsrl.vi v10, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32I-NEXT: vmv.v.x v10, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv2i64: @@ -3721,51 +3679,44 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32I-NEXT: vsub.vx v12, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsrl.vi v12, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v16 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v16, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v12, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: vsrl.vi v20, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v20 ; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vmul.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vsrl.vi v12, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32I-NEXT: vmv.v.x v12, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv4i64: @@ -3848,51 +3799,44 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 -; RV32I-NEXT: lui a0, 349525 -; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, 61681 -; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, 4112 -; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsrl.vi v16, v8, 1 +; RV32I-NEXT: lui a0, 349525 +; RV32I-NEXT: addi a0, a0, 1365 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v24, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v16, v16, v24 ; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v24 +; RV32I-NEXT: lui a0, 209715 +; RV32I-NEXT: addi a0, a0, 819 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v24, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v24 -; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: vlse64.v v24, (a0), zero -; RV32I-NEXT: vsrl.vi v0, v8, 4 -; RV32I-NEXT: vadd.vv v8, v8, v0 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vmul.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vsrl.vi v16, v8, 4 +; RV32I-NEXT: vadd.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 61681 +; RV32I-NEXT: addi a0, a0, -241 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: lui a0, 4112 +; RV32I-NEXT: addi a0, a0, 257 +; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32I-NEXT: vmv.v.x v16, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -1991,39 +1991,23 @@ define @vp_cttz_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2031,19 +2015,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv1i64: @@ -2096,39 +2081,23 @@ define @vp_cttz_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v10, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2136,19 +2105,20 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv1i64_unmasked: @@ -2205,39 +2175,23 @@ define @vp_cttz_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2245,19 +2199,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv2i64: @@ -2310,39 +2265,23 @@ define @vp_cttz_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2350,19 +2289,20 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv2i64_unmasked: @@ -2419,39 +2359,23 @@ define @vp_cttz_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2459,19 +2383,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv4i64: @@ -2524,39 +2449,23 @@ define @vp_cttz_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2564,19 +2473,20 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv4i64_unmasked: @@ -2633,39 +2543,23 @@ define @vp_cttz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2673,19 +2567,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64: @@ -2738,39 +2633,23 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2778,19 +2657,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64_unmasked: @@ -2847,39 +2727,23 @@ define @vp_cttz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2887,19 +2751,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64: @@ -2952,39 +2817,23 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2992,19 +2841,20 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64_unmasked: @@ -3061,189 +2911,252 @@ define @vp_cttz_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: vmv1r.v v24, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a2 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 44(sp) -; RV32-NEXT: sw a2, 40(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 28(sp) -; RV32-NEXT: sw a2, 24(sp) -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 20(sp) -; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2, v0.t +; RV32-NEXT: vsub.vx v8, v16, a2, v0.t ; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 209715 +; RV32-NEXT: addi a4, a4, 819 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: lui a4, 61681 +; RV32-NEXT: addi a4, a4, -241 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: lui a4, 4112 +; RV32-NEXT: addi a4, a4, 257 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB46_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB46_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vsub.vx v16, v8, a2, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v16, a2, v0.t +; RV32-NEXT: vnot.v v16, v16, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv16i64: @@ -3366,28 +3279,18 @@ define @vp_cttz_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -3395,98 +3298,108 @@ ; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2 +; RV32-NEXT: vsub.vx v8, v16, a2 ; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 40 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 32 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: lui a4, 209715 +; RV32-NEXT: addi a4, a4, 819 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 24 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: lui a4, 61681 +; RV32-NEXT: addi a4, a4, -241 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v16, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: lui a4, 4112 +; RV32-NEXT: addi a4, a4, 257 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v24, v24, v16 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v16, v16, v8 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v8, v16, a3 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v8, a2 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v16, v24, a2 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8 ; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -6,28 +6,16 @@ define i32 @splat_vector_split_i64() { ; CHECK-LABEL: splat_vector_split_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: sw zero, 4(sp) +; CHECK-NEXT: sw zero, 12(sp) ; CHECK-NEXT: lui a0, 1044480 -; CHECK-NEXT: sw a0, 0(sp) -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: addi a0, a0, -241 -; CHECK-NEXT: sw a0, 28(sp) -; CHECK-NEXT: sw a0, 24(sp) -; CHECK-NEXT: lui a0, 209715 -; CHECK-NEXT: addi a0, a0, 819 -; CHECK-NEXT: sw a0, 20(sp) -; CHECK-NEXT: sw a0, 16(sp) -; CHECK-NEXT: lui a0, 349525 -; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: li a0, 56 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma @@ -39,7 +27,7 @@ ; CHECK-NEXT: vand.vx v12, v12, a2 ; CHECK-NEXT: vor.vv v10, v12, v10 ; CHECK-NEXT: vsrl.vi v12, v8, 24 -; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: addi a3, sp, 8 ; CHECK-NEXT: vlse64.v v14, (a3), zero ; CHECK-NEXT: lui a3, 4080 ; CHECK-NEXT: vand.vx v12, v12, a3 @@ -56,25 +44,34 @@ ; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vor.vv v8, v14, v8 -; CHECK-NEXT: addi a0, sp, 24 -; CHECK-NEXT: vlse64.v v14, (a0), zero ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: vsrl.vi v10, v8, 4 -; CHECK-NEXT: vand.vv v10, v10, v14 -; CHECK-NEXT: vand.vv v8, v8, v14 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vand.vv v10, v10, v12 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsll.vi v8, v8, 4 ; CHECK-NEXT: vor.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vv v10, v10, v12 ; CHECK-NEXT: vand.vv v8, v8, v12 -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v12, (a0), zero ; CHECK-NEXT: vsll.vi v8, v8, 2 ; CHECK-NEXT: vor.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vv v10, v10, v12 ; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vadd.vv v8, v8, v8 @@ -82,7 +79,7 @@ ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = insertelement zeroinitializer, i64 3, i64 3 %2 = tail call @llvm.bitreverse.nxv2i64( %1)