diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -968,6 +968,17 @@ return std::make_pair(0, RVVStackAlign); } + // All offsets here are multiplied by VLENB, which carries with it its own + // alignment. We can take this into account to avoid over-aligning the stack. + // Since VLEN is always a power of two greater than or equal to 32, knowing + // the minimum VLEN is enough to ensure the same alignment with larger VLENs. + auto VLenBits = ST.getRealMinVLen(); + const unsigned VLenKnown8ByteMultiple = std::max(VLenBits, 64u) / 64; + + auto AlignWithImplicitVLenAlign = [VLenKnown8ByteMultiple](Align A) { + return MaybeAlign(A.value() / VLenKnown8ByteMultiple).valueOrOne(); + }; + // Allocate all RVV locals and spills int64_t Offset = 0; for (int FI : ObjectsToAllocate) { @@ -978,7 +989,8 @@ // register for it. if (ObjectSize < 8) ObjectSize = 8; - Offset = alignTo(Offset + ObjectSize, ObjectAlign); + Offset = + alignTo(Offset + ObjectSize, AlignWithImplicitVLenAlign(ObjectAlign)); MFI.setObjectOffset(FI, -Offset); // Update the maximum alignment of the RVV stack section RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); @@ -988,7 +1000,8 @@ // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. uint64_t StackSize = Offset; - if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { + if (auto AlignmentPadding = offsetToAlignment( + StackSize, AlignWithImplicitVLenAlign(RVVStackAlign))) { StackSize += AlignmentPadding; for (int FI : ObjectsToAllocate) MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma @@ -82,8 +82,8 @@ ; CHECK-NEXT: addi a0, a0, %lo(var_47) ; CHECK-NEXT: vsseg4e16.v v10, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -2143,9 +2143,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2221,7 +2220,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2282,9 +2280,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2314,7 +2311,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2399,9 +2395,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2458,7 +2453,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2566,9 +2560,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2644,7 +2637,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2705,9 +2697,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2737,7 +2728,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2822,9 +2812,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2881,7 +2870,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5434,9 +5422,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5501,7 +5488,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5604,9 +5590,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5644,7 +5629,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5731,9 +5715,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5792,7 +5775,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5924,9 +5906,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -5991,7 +5972,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -6094,9 +6074,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6134,7 +6113,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -6221,9 +6199,8 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 48 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6282,7 +6259,6 @@ ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -33,9 +33,8 @@ ; RV64IV-NEXT: addi sp, sp, -528 ; RV64IV-NEXT: .cfi_def_cfa_offset 528 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 @@ -44,7 +43,6 @@ ; RV64IV-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 528 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,12 +38,10 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 ; CHECK-NEXT: $x10 = ADDI killed $x10, -2048 ; CHECK-NEXT: $x10 = ADDI killed $x10, -224 diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -8,10 +8,8 @@ ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ret %v = alloca @@ -73,135 +71,249 @@ } define void @lmul1_and_2() nounwind { -; CHECK-LABEL: lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul2_and_4() nounwind { -; CHECK-LABEL: lmul2_and_4: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul2_and_4: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul2_and_4: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul1_and_4() nounwind { -; CHECK-LABEL: lmul1_and_4: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul1_and_4: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul1_and_4: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul2_and_1() nounwind { -; CHECK-LABEL: lmul2_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul2_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul2_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_1() nounwind { -; CHECK-LABEL: lmul4_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2() nounwind { -; CHECK-LABEL: lmul4_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2_x2_0() nounwind { -; CHECK-LABEL: lmul4_and_2_x2_0: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2_x2_0: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 12 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2_x2_0: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca @@ -252,19 +364,35 @@ define void @gpr_and_lmul1_and_2() nounwind { -; CHECK-LABEL: gpr_and_lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: sd a0, 8(sp) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; NOZBA-LABEL: gpr_and_lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -16 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: li a0, 3 +; NOZBA-NEXT: sd a0, 8(sp) +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: addi sp, sp, 16 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: gpr_and_lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -16 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: li a0, 3 +; ZBA-NEXT: sd a0, 8(sp) +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: addi sp, sp, 16 +; ZBA-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -273,23 +401,43 @@ } define void @gpr_and_lmul1_and_4() nounwind { -; CHECK-LABEL: gpr_and_lmul1_and_4: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: sd a0, 8(sp) -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: gpr_and_lmul1_and_4: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: li a0, 3 +; NOZBA-NEXT: sd a0, 8(sp) +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: gpr_and_lmul1_and_4: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: li a0, 3 +; ZBA-NEXT: sd a0, 8(sp) +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 52 + ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 51 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -2393,9 +2393,8 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2423,7 +2422,6 @@ ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2465,9 +2463,8 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2495,7 +2492,6 @@ ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1788,10 +1788,10 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1831,8 +1831,8 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1873,10 +1873,10 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1917,8 +1917,8 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1960,10 +1960,10 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -2004,8 +2004,8 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -16,7 +16,6 @@ ; CHECK-NEXT: addi s0, sp, 96 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -16 ; CHECK-NEXT: mv s1, sp diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -51,7 +47,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -60,7 +55,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -69,7 +63,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -77,7 +70,6 @@ ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -21,7 +20,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,7 +61,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -75,7 +72,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -21,7 +20,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,7 +61,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -75,7 +72,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -9,7 +9,8 @@ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, 15 @@ -21,7 +22,8 @@ ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl2re64.v v8, (a2) @@ -54,12 +56,12 @@ ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 112 ; CHECK-NEXT: vl1re64.v v8, (a0) @@ -92,7 +94,8 @@ ; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 144 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp @@ -102,8 +105,7 @@ ; CHECK-NEXT: sub a0, sp, a0 ; CHECK-NEXT: mv sp, a0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: add a2, s1, a2 ; CHECK-NEXT: addi a2, a2, 112 ; CHECK-NEXT: vl1re64.v v8, (a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir --- a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir @@ -1,12 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py # RUN: llc -mtriple riscv32 -mattr=+zve64x -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32-VLEN64 # RUN: llc -mtriple riscv32 -mattr=+v -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32-VLEN128 # RUN: llc -mtriple riscv64 -mattr=+zve64x -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64-VLEN64 # RUN: llc -mtriple riscv64 -mattr=+v -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64-VLEN128 --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" @@ -14,41 +14,77 @@ declare void @extern(*) define void @rvv_stack_align8() #0 { - ; RV32-LABEL: rvv_stack_align8: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: add sp, sp, a0 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align8: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: add sp, sp, a0 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align8: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -48 - ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 32 - ; RV64-NEXT: addi a1, sp, 16 - ; RV64-NEXT: addi a2, sp, 8 - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 48 - ; RV64-NEXT: ret + ; RV32-VLEN128-LABEL: rvv_stack_align8: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: add sp, sp, a0 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret + ; + ; RV64-VLEN64-LABEL: rvv_stack_align8: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -48 + ; RV64-VLEN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: addi a0, sp, 32 + ; RV64-VLEN64-NEXT: addi a1, sp, 16 + ; RV64-VLEN64-NEXT: addi a2, sp, 8 + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: add sp, sp, a0 + ; RV64-VLEN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 48 + ; RV64-VLEN64-NEXT: ret + ; + ; RV64-VLEN128-LABEL: rvv_stack_align8: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -48 + ; RV64-VLEN128-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: addi a0, sp, 32 + ; RV64-VLEN128-NEXT: addi a1, sp, 16 + ; RV64-VLEN128-NEXT: addi a2, sp, 8 + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: add sp, sp, a0 + ; RV64-VLEN128-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 48 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 8 %b = alloca i64 %c = alloca i64 @@ -57,41 +93,77 @@ } define void @rvv_stack_align16() #0 { - ; RV32-LABEL: rvv_stack_align16: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: add sp, sp, a0 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align16: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: add sp, sp, a0 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret + ; + ; RV32-VLEN128-LABEL: rvv_stack_align16: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: add sp, sp, a0 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align16: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -48 - ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 32 - ; RV64-NEXT: addi a1, sp, 16 - ; RV64-NEXT: addi a2, sp, 8 - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 48 - ; RV64-NEXT: ret + ; RV64-VLEN64-LABEL: rvv_stack_align16: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -48 + ; RV64-VLEN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: addi a0, sp, 32 + ; RV64-VLEN64-NEXT: addi a1, sp, 16 + ; RV64-VLEN64-NEXT: addi a2, sp, 8 + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: add sp, sp, a0 + ; RV64-VLEN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 48 + ; RV64-VLEN64-NEXT: ret + ; + ; RV64-VLEN128-LABEL: rvv_stack_align16: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -48 + ; RV64-VLEN128-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: addi a0, sp, 32 + ; RV64-VLEN128-NEXT: addi a1, sp, 16 + ; RV64-VLEN128-NEXT: addi a2, sp, 8 + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: add sp, sp, a0 + ; RV64-VLEN128-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 48 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 16 %b = alloca i64 %c = alloca i64 @@ -100,45 +172,85 @@ } define void @rvv_stack_align32() #0 { - ; RV32-LABEL: rvv_stack_align32: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill - ; RV32-NEXT: addi s0, sp, 48 - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 2 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: andi sp, sp, -32 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: addi sp, s0, -48 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align32: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: addi s0, sp, 48 + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 2 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: andi sp, sp, -32 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: addi sp, s0, -48 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret + ; + ; RV32-VLEN128-LABEL: rvv_stack_align32: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: addi s0, sp, 48 + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: andi sp, sp, -32 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: addi sp, s0, -48 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret + ; + ; RV64-VLEN64-LABEL: rvv_stack_align32: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -80 + ; RV64-VLEN64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: addi s0, sp, 80 + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 2 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: andi sp, sp, -32 + ; RV64-VLEN64-NEXT: addi a0, sp, 64 + ; RV64-VLEN64-NEXT: addi a1, sp, 40 + ; RV64-VLEN64-NEXT: addi a2, sp, 32 + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: addi sp, s0, -80 + ; RV64-VLEN64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 80 + ; RV64-VLEN64-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align32: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -80 - ; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill - ; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill - ; RV64-NEXT: addi s0, sp, 80 - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 2 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: andi sp, sp, -32 - ; RV64-NEXT: addi a0, sp, 64 - ; RV64-NEXT: addi a1, sp, 40 - ; RV64-NEXT: addi a2, sp, 32 - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: addi sp, s0, -80 - ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload - ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 80 - ; RV64-NEXT: ret + ; RV64-VLEN128-LABEL: rvv_stack_align32: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -80 + ; RV64-VLEN128-NEXT: sd ra, 72(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: sd s0, 64(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: addi s0, sp, 80 + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: andi sp, sp, -32 + ; RV64-VLEN128-NEXT: addi a0, sp, 64 + ; RV64-VLEN128-NEXT: addi a1, sp, 40 + ; RV64-VLEN128-NEXT: addi a2, sp, 32 + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: addi sp, s0, -80 + ; RV64-VLEN128-NEXT: ld ra, 72(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: ld s0, 64(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 80 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 32 %b = alloca i64 %c = alloca i64 diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefix=RV32ZVE64X ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefix=RV64ZVE64X ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ @@ -11,18 +11,50 @@ ; FIXME: We are over-aligning the stack on V, wasting stack space. define ptr @scalar_stack_align16() nounwind { +; RV32ZVE64X-LABEL: scalar_stack_align16: +; RV32ZVE64X: # %bb.0: +; RV32ZVE64X-NEXT: addi sp, sp, -48 +; RV32ZVE64X-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32ZVE64X-NEXT: csrr a0, vlenb +; RV32ZVE64X-NEXT: slli a0, a0, 1 +; RV32ZVE64X-NEXT: sub sp, sp, a0 +; RV32ZVE64X-NEXT: addi a0, sp, 32 +; RV32ZVE64X-NEXT: call extern@plt +; RV32ZVE64X-NEXT: addi a0, sp, 16 +; RV32ZVE64X-NEXT: csrr a1, vlenb +; RV32ZVE64X-NEXT: slli a1, a1, 1 +; RV32ZVE64X-NEXT: add sp, sp, a1 +; RV32ZVE64X-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32ZVE64X-NEXT: addi sp, sp, 48 +; RV32ZVE64X-NEXT: ret +; +; RV64ZVE64X-LABEL: scalar_stack_align16: +; RV64ZVE64X: # %bb.0: +; RV64ZVE64X-NEXT: addi sp, sp, -48 +; RV64ZVE64X-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64ZVE64X-NEXT: csrr a0, vlenb +; RV64ZVE64X-NEXT: slli a0, a0, 1 +; RV64ZVE64X-NEXT: sub sp, sp, a0 +; RV64ZVE64X-NEXT: addi a0, sp, 32 +; RV64ZVE64X-NEXT: call extern@plt +; RV64ZVE64X-NEXT: addi a0, sp, 16 +; RV64ZVE64X-NEXT: csrr a1, vlenb +; RV64ZVE64X-NEXT: slli a1, a1, 1 +; RV64ZVE64X-NEXT: add sp, sp, a1 +; RV64ZVE64X-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64ZVE64X-NEXT: addi sp, sp, 48 +; RV64ZVE64X-NEXT: ret +; ; RV32-LABEL: scalar_stack_align16: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: call extern@plt ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: add sp, sp, a1 ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 48 @@ -33,13 +65,11 @@ ; RV64-NEXT: addi sp, sp, -48 ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: call extern@plt ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add sp, sp, a1 ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir @@ -124,7 +124,7 @@ ; CHECK-NEXT: - { id: 0, name: buf1, type: default, offset: -48, size: 1, alignment: 8, ; CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, @@ -155,17 +155,14 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -40 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x10 = frame-setup SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 ; CHECK-NEXT: renamable $x8 = COPY $x14 ; CHECK-NEXT: renamable $x9 = COPY $x11 ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 72 :: (store (s64)) ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 64 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype