diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -98,13 +98,12 @@ MF.getInfo(); SDLoc DL(N); - // We use the same frame index we use for moving two i32s into 64-bit FPR. - // This is an analogous operation. - int FI = FuncInfo->getMoveF64FrameIndex(MF); + // Create temporary stack for each expanding node. + SDValue StackSlot = + CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4)); + int FI = cast(StackSlot.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); - SDValue StackSlot = - CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); SDValue Chain = CurDAG->getEntryNode(); Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -967,19 +967,19 @@ define @bitreverse_nxv1i64( %va) { ; RV32-LABEL: bitreverse_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 ; RV32-NEXT: sw a0, 12(sp) @@ -993,10 +993,10 @@ ; RV32-NEXT: li a2, 40 ; RV32-NEXT: vsll.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: mv a3, sp ; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v11, v8, a3 ; RV32-NEXT: vsll.vi v11, v11, 24 ; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 @@ -1007,23 +1007,26 @@ ; RV32-NEXT: vand.vx v12, v12, a1 ; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 @@ -1031,7 +1034,7 @@ ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv1i64: @@ -1101,19 +1104,19 @@ define @bitreverse_nxv2i64( %va) { ; RV32-LABEL: bitreverse_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 ; RV32-NEXT: sw a0, 12(sp) @@ -1127,10 +1130,10 @@ ; RV32-NEXT: li a2, 40 ; RV32-NEXT: vsll.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: mv a3, sp ; RV32-NEXT: vlse64.v v12, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v14, v8, a3 ; RV32-NEXT: vsll.vi v14, v14, 24 ; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 @@ -1141,23 +1144,26 @@ ; RV32-NEXT: vand.vx v16, v16, a1 ; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1165,7 +1171,7 @@ ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv2i64: @@ -1235,19 +1241,19 @@ define @bitreverse_nxv4i64( %va) { ; RV32-LABEL: bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 ; RV32-NEXT: sw a0, 12(sp) @@ -1261,10 +1267,10 @@ ; RV32-NEXT: li a2, 40 ; RV32-NEXT: vsll.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: mv a3, sp ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v20, v8, a3 ; RV32-NEXT: vsll.vi v20, v20, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 @@ -1275,23 +1281,26 @@ ; RV32-NEXT: vand.vx v24, v24, a1 ; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a4 +; RV32-NEXT: vand.vx v24, v24, a3 ; RV32-NEXT: vsrl.vi v8, v8, 8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 @@ -1299,7 +1308,7 @@ ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv4i64: @@ -1369,27 +1378,27 @@ define @bitreverse_nxv8i64( %va) { ; RV32-LABEL: bitreverse_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 44(sp) +; RV32-NEXT: sw a0, 40(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 36(sp) +; RV32-NEXT: sw a0, 32(sp) ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a0 @@ -1399,20 +1408,20 @@ ; RV32-NEXT: li a2, 40 ; RV32-NEXT: vsll.vx v24, v24, a2 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload +; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a2 ; RV32-NEXT: vand.vx v0, v0, a1 ; RV32-NEXT: vsrl.vx v16, v8, a0 @@ -1420,23 +1429,26 @@ ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 40 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 4 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 1 @@ -1447,7 +1459,7 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1969,55 +1969,56 @@ define @vp_bitreverse_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a2, v0.t -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a3, a1, -256 -; RV32-NEXT: vand.vx v10, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsll.vx v10, v10, a4, v0.t +; RV32-NEXT: vsll.vx v9, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v10, v8, a5, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v8, a4, v0.t ; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v12, v8, v11, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t -; RV32-NEXT: vand.vx v12, v12, a3, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV32-NEXT: vand.vx v12, v12, a2, v0.t ; RV32-NEXT: vor.vv v10, v12, v10, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vand.vx v12, v12, a5, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: vand.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2026,6 +2027,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2034,6 +2036,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2041,7 +2044,7 @@ ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv1i64: @@ -2110,19 +2113,19 @@ define @vp_bitreverse_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) @@ -2139,7 +2142,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2159,30 +2162,33 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a5), zero +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a5), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a5), zero +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv1i64_unmasked: @@ -2255,55 +2261,56 @@ define @vp_bitreverse_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a2, v0.t -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a3, a1, -256 -; RV32-NEXT: vand.vx v12, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsll.vx v12, v12, a4, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v12, v8, a5, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v16, v8, v14, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: vor.vv v12, v12, v16, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t ; RV32-NEXT: vor.vv v12, v16, v12, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vand.vx v16, v16, a5, v0.t +; RV32-NEXT: vand.vx v16, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: vand.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2312,6 +2319,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2320,6 +2328,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2327,7 +2336,7 @@ ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv2i64: @@ -2396,19 +2405,19 @@ define @vp_bitreverse_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) @@ -2425,7 +2434,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v8, a4 ; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2445,30 +2454,33 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv2i64_unmasked: @@ -2541,55 +2553,56 @@ define @vp_bitreverse_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a2, v0.t -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a3, a1, -256 -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsll.vx v16, v16, a4, v0.t +; RV32-NEXT: vsll.vx v12, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t ; RV32-NEXT: vor.vv v16, v12, v16, v0.t -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v12, v8, a5, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v8, a4, v0.t ; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v24, v8, v12, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t ; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t ; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2598,6 +2611,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2606,6 +2620,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2613,7 +2628,7 @@ ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64: @@ -2682,19 +2697,19 @@ define @vp_bitreverse_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw zero, 4(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 12(sp) @@ -2711,7 +2726,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2731,30 +2746,33 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked: @@ -2827,98 +2845,99 @@ define @vp_bitreverse_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a2, 56 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a3, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v16, v8, a5, v0.t +; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t ; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2927,6 +2946,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2935,6 +2955,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2946,7 +2967,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv7i64: @@ -3032,27 +3053,27 @@ define @vp_bitreverse_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -3062,22 +3083,22 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vl8r.v v0, (a6) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 ; RV32-NEXT: vsrl.vx v24, v8, a1 @@ -3088,28 +3109,31 @@ ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -3118,7 +3142,7 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked: @@ -3191,98 +3215,99 @@ define @vp_bitreverse_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a2, 56 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a3, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v16, v8, a5, v0.t +; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v8, a4, v0.t ; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t -; RV32-NEXT: vand.vx v16, v16, a3, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -3291,6 +3316,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -3299,6 +3325,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -3310,7 +3337,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv8i64: @@ -3396,27 +3423,27 @@ define @vp_bitreverse_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -3426,22 +3453,22 @@ ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4 ; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vl8r.v v0, (a6) # Unknown-size Folded Reload +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 ; RV32-NEXT: vsrl.vx v24, v8, a1 @@ -3452,28 +3479,31 @@ ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -3482,7 +3512,7 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1417,24 +1417,24 @@ define @ctlz_nxv1i64( %va) { ; RV32I-LABEL: ctlz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v9 @@ -1449,9 +1449,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v9, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 1 ; RV32I-NEXT: vand.vv v9, v11, v9 @@ -1460,7 +1461,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v11 @@ -1468,7 +1471,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv1i64: @@ -1557,24 +1560,24 @@ define @ctlz_nxv2i64( %va) { ; RV32I-LABEL: ctlz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v10 @@ -1589,9 +1592,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v10, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 1 ; RV32I-NEXT: vand.vv v10, v14, v10 @@ -1600,7 +1604,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v14 @@ -1608,7 +1614,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv2i64: @@ -1697,24 +1703,24 @@ define @ctlz_nxv4i64( %va) { ; RV32I-LABEL: ctlz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v12 @@ -1729,9 +1735,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v12, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 1 ; RV32I-NEXT: vand.vv v12, v20, v12 @@ -1740,7 +1747,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v16 ; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v20 @@ -1748,7 +1757,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv4i64: @@ -1837,24 +1846,24 @@ define @ctlz_nxv8i64( %va) { ; RV32I-LABEL: ctlz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -1869,9 +1878,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -1880,7 +1890,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -1888,7 +1900,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv8i64: @@ -3307,24 +3319,24 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v9 @@ -3339,9 +3351,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v9, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v9 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v9, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 1 ; RV32I-NEXT: vand.vv v9, v11, v9 @@ -3350,7 +3363,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v11 @@ -3358,7 +3373,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv1i64: @@ -3442,24 +3457,24 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v10 @@ -3474,9 +3489,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v10, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v10 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 1 ; RV32I-NEXT: vand.vv v10, v14, v10 @@ -3485,7 +3501,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v14 @@ -3493,7 +3511,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv2i64: @@ -3577,24 +3595,24 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v12 @@ -3609,9 +3627,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v12, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v12 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 1 ; RV32I-NEXT: vand.vv v12, v20, v12 @@ -3620,7 +3639,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v16 ; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v20 @@ -3628,7 +3649,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv4i64: @@ -3712,24 +3733,24 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -3744,9 +3765,10 @@ ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vnot.v v8, v8 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -3755,7 +3777,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -3763,7 +3787,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2073,24 +2073,24 @@ define @vp_ctlz_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t @@ -2107,12 +2107,13 @@ ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2122,17 +2123,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv1i64: @@ -2189,24 +2192,24 @@ define @vp_ctlz_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -2223,12 +2226,13 @@ ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -2238,17 +2242,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv1i64_unmasked: @@ -2309,24 +2315,24 @@ define @vp_ctlz_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t @@ -2343,12 +2349,13 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2358,17 +2365,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv2i64: @@ -2425,24 +2434,24 @@ define @vp_ctlz_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -2459,12 +2468,13 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2474,17 +2484,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv2i64_unmasked: @@ -2545,24 +2557,24 @@ define @vp_ctlz_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t @@ -2579,12 +2591,13 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2594,17 +2607,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv4i64: @@ -2661,24 +2676,24 @@ define @vp_ctlz_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -2695,12 +2710,13 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2710,17 +2726,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv4i64_unmasked: @@ -2781,24 +2799,24 @@ define @vp_ctlz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2815,12 +2833,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2830,17 +2849,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv7i64: @@ -2897,24 +2918,24 @@ define @vp_ctlz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2931,12 +2952,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2946,17 +2968,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv7i64_unmasked: @@ -3017,24 +3041,24 @@ define @vp_ctlz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -3051,12 +3075,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -3066,17 +3091,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv8i64: @@ -3133,24 +3160,24 @@ define @vp_ctlz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -3167,12 +3194,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -3182,17 +3210,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv8i64_unmasked: @@ -3253,13 +3283,13 @@ define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -3267,20 +3297,20 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 44(sp) +; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 @@ -3303,111 +3333,114 @@ ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB46_2 ; RV32-NEXT: # %bb.1: @@ -3432,14 +3465,14 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -3451,23 +3484,23 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv16i64: @@ -3588,28 +3621,28 @@ define @vp_ctlz_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -3631,18 +3664,19 @@ ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -3652,27 +3686,29 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v24, v24, v16 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: @@ -3697,7 +3733,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v24 @@ -3710,22 +3746,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_nxv16i64_unmasked: @@ -5865,24 +5901,24 @@ define @vp_ctlz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t @@ -5899,12 +5935,13 @@ ; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -5914,17 +5951,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64: @@ -5981,24 +6020,24 @@ define @vp_ctlz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -6015,12 +6054,13 @@ ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -6030,17 +6070,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv1i64_unmasked: @@ -6100,24 +6142,24 @@ define @vp_ctlz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t @@ -6134,12 +6176,13 @@ ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -6149,17 +6192,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64: @@ -6216,24 +6261,24 @@ define @vp_ctlz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -6250,12 +6295,13 @@ ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -6265,17 +6311,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv2i64_unmasked: @@ -6335,24 +6383,24 @@ define @vp_ctlz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t @@ -6369,12 +6417,13 @@ ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -6384,17 +6433,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64: @@ -6451,24 +6502,24 @@ define @vp_ctlz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -6485,12 +6536,13 @@ ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -6500,17 +6552,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv4i64_unmasked: @@ -6570,24 +6624,24 @@ define @vp_ctlz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -6604,12 +6658,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -6619,17 +6674,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64: @@ -6686,24 +6743,24 @@ define @vp_ctlz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -6720,12 +6777,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -6735,17 +6793,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv7i64_unmasked: @@ -6805,24 +6865,24 @@ define @vp_ctlz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -6839,12 +6899,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -6854,17 +6915,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64: @@ -6921,24 +6984,24 @@ define @vp_ctlz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -6955,12 +7018,13 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -6970,17 +7034,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv8i64_unmasked: @@ -7039,13 +7105,13 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -7053,20 +7119,20 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 44(sp) +; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 @@ -7089,111 +7155,114 @@ ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB94_2 ; RV32-NEXT: # %bb.1: @@ -7218,14 +7287,14 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -7237,23 +7306,23 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64: @@ -7374,28 +7443,28 @@ define @vp_ctlz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -7417,18 +7486,19 @@ ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -7438,27 +7508,29 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v24, v24, v16 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB95_2 ; RV32-NEXT: # %bb.1: @@ -7483,7 +7555,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v24 @@ -7496,22 +7568,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -752,27 +752,28 @@ define @ctpop_nxv1i64( %va) { ; RV32-LABEL: ctpop_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsrl.vi v11, v8, 1 ; RV32-NEXT: vand.vv v9, v11, v9 @@ -781,7 +782,9 @@ ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsrl.vi v11, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v11 @@ -789,7 +792,7 @@ ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv1i64: @@ -833,27 +836,28 @@ define @ctpop_nxv2i64( %va) { ; RV32-LABEL: ctpop_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsrl.vi v14, v8, 1 ; RV32-NEXT: vand.vv v10, v14, v10 @@ -862,7 +866,9 @@ ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsrl.vi v14, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v14 @@ -870,7 +876,7 @@ ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv2i64: @@ -914,27 +920,28 @@ define @ctpop_nxv4i64( %va) { ; RV32-LABEL: ctpop_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsrl.vi v20, v8, 1 ; RV32-NEXT: vand.vv v12, v20, v12 @@ -943,7 +950,9 @@ ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v12, v8 +; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsrl.vi v20, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v20 @@ -951,7 +960,7 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv4i64: @@ -995,27 +1004,28 @@ define @ctpop_nxv8i64( %va) { ; RV32-LABEL: ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: vand.vv v16, v0, v16 @@ -1024,7 +1034,9 @@ ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 @@ -1032,7 +1044,7 @@ ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -1539,32 +1539,33 @@ define @vp_ctpop_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1574,17 +1575,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv1i64: @@ -1627,32 +1630,33 @@ define @vp_ctpop_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1662,17 +1666,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv1i64_unmasked: @@ -1719,32 +1725,33 @@ define @vp_ctpop_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1754,17 +1761,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv2i64: @@ -1807,32 +1816,33 @@ define @vp_ctpop_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -1842,17 +1852,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv2i64_unmasked: @@ -1899,32 +1911,33 @@ define @vp_ctpop_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -1934,17 +1947,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv4i64: @@ -1987,32 +2002,33 @@ define @vp_ctpop_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2022,17 +2038,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv4i64_unmasked: @@ -2079,32 +2097,33 @@ define @vp_ctpop_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2114,17 +2133,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64: @@ -2167,32 +2188,33 @@ define @vp_ctpop_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2202,17 +2224,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked: @@ -2259,32 +2283,33 @@ define @vp_ctpop_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2294,17 +2319,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64: @@ -2347,32 +2374,33 @@ define @vp_ctpop_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2382,17 +2410,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked: @@ -2439,19 +2469,19 @@ define @vp_ctpop_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -2459,107 +2489,122 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 44(sp) +; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a2, 56 @@ -2567,7 +2612,7 @@ ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB46_2 ; RV32-NEXT: # %bb.1: @@ -2575,13 +2620,13 @@ ; RV32-NEXT: .LBB46_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t @@ -2589,7 +2634,7 @@ ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2601,26 +2646,26 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv16i64: @@ -2714,28 +2759,28 @@ define @vp_ctpop_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -2743,18 +2788,19 @@ ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -2764,27 +2810,29 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v24, v16, v24 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v16, v16, a2 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: @@ -2796,7 +2844,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 @@ -2809,22 +2857,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a2 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1356,40 +1356,43 @@ define @cttz_nxv1i64( %va) { ; RV32I-LABEL: cttz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32I-NEXT: vsub.vx v9, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v10, v11, v10 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: vand.vv v9, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v11 @@ -1397,7 +1400,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv1i64: @@ -1521,40 +1524,43 @@ define @cttz_nxv2i64( %va) { ; RV32I-LABEL: cttz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32I-NEXT: vsub.vx v10, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v12, v14, v12 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v14 @@ -1562,7 +1568,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv2i64: @@ -1686,40 +1692,43 @@ define @cttz_nxv4i64( %va) { ; RV32I-LABEL: cttz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32I-NEXT: vsub.vx v12, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v16, v20, v16 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v12 +; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v20 @@ -1727,7 +1736,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv4i64: @@ -1851,40 +1860,43 @@ define @cttz_nxv8i64( %va) { ; RV32I-LABEL: cttz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v24, v0, v24 -; RV32I-NEXT: vsub.vv v8, v8, v24 -; RV32I-NEXT: vand.vv v24, v8, v16 +; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v24 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vand.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -1892,7 +1904,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv8i64: @@ -3265,40 +3277,43 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32I-NEXT: vsub.vx v9, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 1 -; RV32I-NEXT: vand.vv v10, v11, v10 -; RV32I-NEXT: vsub.vv v8, v8, v10 -; RV32I-NEXT: vand.vv v10, v8, v9 +; RV32I-NEXT: vand.vv v9, v11, v9 +; RV32I-NEXT: vsub.vv v8, v8, v9 +; RV32I-NEXT: vand.vv v9, v8, v10 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v9 -; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: vadd.vv v8, v9, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v9, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v10, (a0), zero ; RV32I-NEXT: vsrl.vi v11, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v11 @@ -3306,7 +3321,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v10 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv1i64: @@ -3385,40 +3400,43 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32I-NEXT: vsub.vx v10, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 1 -; RV32I-NEXT: vand.vv v12, v14, v12 -; RV32I-NEXT: vsub.vv v8, v8, v12 -; RV32I-NEXT: vand.vv v12, v8, v10 +; RV32I-NEXT: vand.vv v10, v14, v10 +; RV32I-NEXT: vsub.vv v8, v8, v10 +; RV32I-NEXT: vand.vv v10, v8, v12 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v10 -; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: vadd.vv v8, v10, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v10, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v12, (a0), zero ; RV32I-NEXT: vsrl.vi v14, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v14 @@ -3426,7 +3444,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v12 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv2i64: @@ -3505,40 +3523,43 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32I-NEXT: vsub.vx v12, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 1 -; RV32I-NEXT: vand.vv v16, v20, v16 -; RV32I-NEXT: vsub.vv v8, v8, v16 -; RV32I-NEXT: vand.vv v16, v8, v12 +; RV32I-NEXT: vand.vv v12, v20, v12 +; RV32I-NEXT: vsub.vv v8, v8, v12 +; RV32I-NEXT: vand.vv v12, v8, v16 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v12 -; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: vadd.vv v8, v12, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v12, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v16, (a0), zero ; RV32I-NEXT: vsrl.vi v20, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v20 @@ -3546,7 +3567,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v16 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv4i64: @@ -3625,40 +3646,43 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 4(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 -; RV32I-NEXT: vand.vv v24, v0, v24 -; RV32I-NEXT: vsub.vv v8, v8, v24 -; RV32I-NEXT: vand.vv v24, v8, v16 +; RV32I-NEXT: vand.vv v16, v0, v16 +; RV32I-NEXT: vsub.vv v8, v8, v16 +; RV32I-NEXT: vand.vv v16, v8, v24 ; RV32I-NEXT: vsrl.vi v8, v8, 2 -; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: vadd.vv v8, v24, v8 +; RV32I-NEXT: vand.vv v8, v8, v24 +; RV32I-NEXT: vadd.vv v8, v16, v8 +; RV32I-NEXT: addi a0, sp, 8 ; RV32I-NEXT: vlse64.v v16, (a0), zero +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -3666,7 +3690,7 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -1771,36 +1771,37 @@ define @vp_cttz_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1810,17 +1811,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv1i64: @@ -1867,36 +1870,37 @@ define @vp_cttz_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1906,17 +1910,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv1i64_unmasked: @@ -1967,36 +1973,37 @@ define @vp_cttz_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2006,17 +2013,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv2i64: @@ -2063,36 +2072,37 @@ define @vp_cttz_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -2102,17 +2112,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv2i64_unmasked: @@ -2163,36 +2175,37 @@ define @vp_cttz_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2202,17 +2215,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv4i64: @@ -2259,36 +2274,37 @@ define @vp_cttz_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2298,17 +2314,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv4i64_unmasked: @@ -2359,36 +2377,37 @@ define @vp_cttz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2398,17 +2417,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64: @@ -2455,36 +2476,37 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2494,17 +2516,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64_unmasked: @@ -2555,36 +2579,37 @@ define @vp_cttz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2594,17 +2619,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64: @@ -2651,36 +2678,37 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2690,17 +2718,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64_unmasked: @@ -2751,13 +2781,13 @@ define @vp_cttz_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -2765,20 +2795,20 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 44(sp) +; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 @@ -2789,113 +2819,101 @@ ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB46_2 ; RV32-NEXT: # %bb.1: @@ -2908,16 +2926,17 @@ ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -2926,26 +2945,25 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv16i64: @@ -3046,28 +3064,28 @@ define @vp_cttz_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -3079,18 +3097,19 @@ ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -3100,27 +3119,29 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v24, v24, v16 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: @@ -3135,7 +3156,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v24 @@ -3148,22 +3169,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv16i64_unmasked: @@ -4981,36 +5002,37 @@ define @vp_cttz_zero_undef_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -5020,17 +5042,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv1i64: @@ -5077,36 +5101,37 @@ define @vp_cttz_zero_undef_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -5116,17 +5141,19 @@ ; RV32-NEXT: vadd.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: @@ -5176,36 +5203,37 @@ define @vp_cttz_zero_undef_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -5215,17 +5243,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv2i64: @@ -5272,36 +5302,37 @@ define @vp_cttz_zero_undef_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma @@ -5311,17 +5342,19 @@ ; RV32-NEXT: vadd.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: @@ -5371,36 +5404,37 @@ define @vp_cttz_zero_undef_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -5410,17 +5444,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv4i64: @@ -5467,36 +5503,37 @@ define @vp_cttz_zero_undef_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -5506,17 +5543,19 @@ ; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: @@ -5566,36 +5605,37 @@ define @vp_cttz_zero_undef_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -5605,17 +5645,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv7i64: @@ -5662,36 +5704,37 @@ define @vp_cttz_zero_undef_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -5701,17 +5744,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: @@ -5761,36 +5806,37 @@ define @vp_cttz_zero_undef_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -5800,17 +5846,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv8i64: @@ -5857,36 +5905,37 @@ define @vp_cttz_zero_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -5896,17 +5945,19 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: @@ -5955,13 +6006,13 @@ define @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -5969,20 +6020,20 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 44(sp) +; RV32-NEXT: sw a2, 40(sp) ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 @@ -5993,113 +6044,101 @@ ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v24, v16, v0.t ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v16, a3, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB94_2 ; RV32-NEXT: # %bb.1: @@ -6112,16 +6151,17 @@ ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -6130,26 +6170,25 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv16i64: @@ -6250,28 +6289,28 @@ define @vp_cttz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -6283,18 +6322,19 @@ ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: addi a4, sp, 40 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -6304,27 +6344,29 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: addi a4, sp, 24 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v24, v24, v16 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsrl.vx v16, v24, a3 -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB95_2 ; RV32-NEXT: # %bb.1: @@ -6339,7 +6381,7 @@ ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v8, v8, v24 @@ -6352,22 +6394,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +declare @llvm.bitreverse.nxv2i64() + +define i32 @splat_vector_split_i64() { +; CHECK-LABEL: splat_vector_split_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 3 +; CHECK-NEXT: sw zero, 4(sp) +; CHECK-NEXT: lui a0, 1044480 +; CHECK-NEXT: sw a0, 0(sp) +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: sw a0, 28(sp) +; CHECK-NEXT: sw a0, 24(sp) +; CHECK-NEXT: lui a0, 209715 +; CHECK-NEXT: addi a0, a0, 819 +; CHECK-NEXT: sw a0, 20(sp) +; CHECK-NEXT: sw a0, 16(sp) +; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: sw a0, 12(sp) +; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsrl.vx v10, v8, a0 +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: vsrl.vx v12, v8, a1 +; CHECK-NEXT: lui a2, 16 +; CHECK-NEXT: addi a2, a2, -256 +; CHECK-NEXT: vand.vx v12, v12, a2 +; CHECK-NEXT: vor.vv v10, v12, v10 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: vlse64.v v14, (a3), zero +; CHECK-NEXT: lui a3, 4080 +; CHECK-NEXT: vand.vx v12, v12, a3 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vand.vv v16, v16, v14 +; CHECK-NEXT: vor.vv v12, v16, v12 +; CHECK-NEXT: vor.vv v10, v12, v10 +; CHECK-NEXT: vand.vv v12, v8, v14 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vand.vx v14, v8, a3 +; CHECK-NEXT: vsll.vi v14, v14, 24 +; CHECK-NEXT: vor.vv v12, v14, v12 +; CHECK-NEXT: vsll.vx v14, v8, a0 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vsll.vx v8, v8, a1 +; CHECK-NEXT: vor.vv v8, v14, v8 +; CHECK-NEXT: addi a0, sp, 24 +; CHECK-NEXT: vlse64.v v14, (a0), zero +; CHECK-NEXT: vor.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vi v10, v8, 4 +; CHECK-NEXT: vand.vv v10, v10, v14 +; CHECK-NEXT: vand.vv v8, v8, v14 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 2 +; CHECK-NEXT: vand.vv v10, v10, v12 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 1 +; CHECK-NEXT: vand.vv v10, v10, v12 +; CHECK-NEXT: vand.vv v8, v8, v12 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %1 = insertelement zeroinitializer, i64 3, i64 3 + %2 = tail call @llvm.bitreverse.nxv2i64( %1) + %3 = extractelement %2, i32 3 + %4 = trunc i64 %3 to i32 + ret i32 %4 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -658,7 +658,7 @@ ; RV32-NEXT: lui a1, 11557 ; RV32-NEXT: addi a1, a1, -683 ; RV32-NEXT: mul a1, a0, a1 -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: li a1, 62 ; RV32-NEXT: mul a1, a0, a1 @@ -666,10 +666,11 @@ ; RV32-NEXT: addi a2, a2, -1368 ; RV32-NEXT: mulhu a0, a0, a2 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 4(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v24 ; RV32-NEXT: vmul.vv v8, v24, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -870,8 +870,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll @@ -1361,8 +1361,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -932,8 +932,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll @@ -1154,8 +1154,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -848,8 +848,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -1361,8 +1361,9 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 12(sp) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vxor.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16