diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -82,7 +82,7 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; std::pair - assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; + assignRVVStackObjectOffsets(MachineFunction &MF) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -897,7 +897,8 @@ } std::pair -RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { +RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); // Create a buffer of RVV objects to allocate. SmallVector ObjectsToAllocate; for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { @@ -910,10 +911,29 @@ ObjectsToAllocate.push_back(I); } - // Allocate all RVV locals and spills - int64_t Offset = 0; // The minimum alignment is 16 bytes. Align RVVStackAlign(16); + const auto &ST = MF.getSubtarget(); + + if (!ST.hasVInstructions()) { + assert(ObjectsToAllocate.empty() && + "Can't allocate scalable-vector objects without V instructions"); + return std::make_pair(0, RVVStackAlign); + } + + // All offsets here are multiplied by VLENB, which carries with it its own + // alignment. We can take this into account to avoid over-aligning the stack. + // Since VLEN is always a power of two greater than 32, knowing the minimum + // VLEN is enough to ensure the same alignment with larger VLENs. + auto VLenBits = ST.getRealMinVLen(); + const unsigned VLenKnown8ByteMultiple = std::max(VLenBits, 64u) / 64; + + auto AlignWithImplicitVLenAlign = [VLenKnown8ByteMultiple](Align A) { + return MaybeAlign(A.value() / VLenKnown8ByteMultiple).valueOrOne(); + }; + + // Allocate all RVV locals and spills + int64_t Offset = 0; for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); @@ -922,7 +942,8 @@ // register for it. if (ObjectSize < 8) ObjectSize = 8; - Offset = alignTo(Offset + ObjectSize, ObjectAlign); + Offset = + alignTo(Offset + ObjectSize, AlignWithImplicitVLenAlign(ObjectAlign)); MFI.setObjectOffset(FI, -Offset); // Update the maximum alignment of the RVV stack section RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); @@ -932,7 +953,8 @@ // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. uint64_t StackSize = Offset; - if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { + if (auto AlignmentPadding = offsetToAlignment( + StackSize, AlignWithImplicitVLenAlign(RVVStackAlign))) { StackSize += AlignmentPadding; for (int FI : ObjectsToAllocate) MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); @@ -961,7 +983,7 @@ int64_t RVVStackSize; Align RVVStackAlign; - std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI); + std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF); RVFI->setRVVStackSize(RVVStackSize); RVFI->setRVVStackAlign(RVVStackAlign); diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -17,8 +17,8 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) @@ -83,8 +83,8 @@ ; CHECK-NEXT: addi a0, a0, %lo(var_47) ; CHECK-NEXT: vsseg4e16.v v10, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -34,7 +34,6 @@ ; RV64IV-NEXT: addi sp, sp, -544 ; RV64IV-NEXT: .cfi_def_cfa_offset 544 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: addi a0, sp, 24 ; RV64IV-NEXT: vl1re64.v v8, (a0) @@ -44,7 +43,6 @@ ; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 544 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,12 +38,10 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x11 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = SLLI killed $x11, 1 ; CHECK-NEXT: $x10 = LUI 1048575 ; CHECK-NEXT: $x10 = ADDIW killed $x10, 1824 ; CHECK-NEXT: $x10 = ADD $x8, killed $x10 diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -1,17 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ret %v = alloca @@ -73,15 +71,27 @@ } define void @lmul1_and_2() nounwind { -; CHECK-LABEL: lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -95,7 +105,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -48 @@ -116,7 +127,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -48 @@ -130,15 +142,27 @@ } define void @lmul2_and_1() nounwind { -; CHECK-LABEL: lmul2_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul2_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul2_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -152,7 +176,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -48 @@ -173,7 +198,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -48 @@ -194,7 +220,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -48 @@ -235,19 +262,35 @@ define void @gpr_and_lmul1_and_2() nounwind { -; CHECK-LABEL: gpr_and_lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: sd a0, 8(sp) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; NOZBA-LABEL: gpr_and_lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -16 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: li a0, 3 +; NOZBA-NEXT: sd a0, 8(sp) +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: addi sp, sp, 16 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: gpr_and_lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -16 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: li a0, 3 +; ZBA-NEXT: sd a0, 8(sp) +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 +; ZBA-NEXT: addi sp, sp, 16 +; ZBA-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -263,7 +306,8 @@ ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: li a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 52 + ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 51 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1833,8 +1833,8 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu @@ -1882,8 +1882,8 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -16,7 +16,6 @@ ; CHECK-NEXT: addi s0, sp, 96 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -16 ; CHECK-NEXT: mv s1, sp diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -36,7 +33,6 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -52,7 +48,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -61,7 +56,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -70,7 +64,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -79,7 +72,6 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -21,7 +20,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -64,7 +62,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -76,7 +73,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -36,7 +33,6 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -9,7 +9,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -21,7 +20,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -64,7 +62,6 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -76,7 +73,6 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -9,7 +9,8 @@ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, 15 @@ -21,7 +22,8 @@ ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl2re64.v v8, (a2) @@ -54,12 +56,12 @@ ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 112 ; CHECK-NEXT: vl1re64.v v8, (a0) @@ -92,7 +94,8 @@ ; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 144 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp @@ -102,8 +105,7 @@ ; CHECK-NEXT: sub a0, sp, a0 ; CHECK-NEXT: mv sp, a0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: add a2, s1, a2 ; CHECK-NEXT: addi a2, a2, 112 ; CHECK-NEXT: vl1re64.v v8, (a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir --- a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir @@ -1,12 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py # RUN: llc -mtriple riscv32 -mattr=+zve64x -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32-VLEN64 # RUN: llc -mtriple riscv32 -mattr=+v -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV32-VLEN128 # RUN: llc -mtriple riscv64 -mattr=+zve64x -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64-VLEN64 # RUN: llc -mtriple riscv64 -mattr=+v -start-before=prologepilog -o - \ -# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64 +# RUN: -verify-machineinstrs %s | FileCheck %s --check-prefix=RV64-VLEN128 --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" @@ -14,41 +14,77 @@ declare void @extern(*) define void @rvv_stack_align8() #0 { - ; RV32-LABEL: rvv_stack_align8: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: add sp, sp, a0 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align8: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: add sp, sp, a0 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align8: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -48 - ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 32 - ; RV64-NEXT: addi a1, sp, 16 - ; RV64-NEXT: addi a2, sp, 8 - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 48 - ; RV64-NEXT: ret + ; RV32-VLEN128-LABEL: rvv_stack_align8: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: add sp, sp, a0 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret + ; + ; RV64-VLEN64-LABEL: rvv_stack_align8: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -48 + ; RV64-VLEN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: addi a0, sp, 32 + ; RV64-VLEN64-NEXT: addi a1, sp, 16 + ; RV64-VLEN64-NEXT: addi a2, sp, 8 + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: add sp, sp, a0 + ; RV64-VLEN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 48 + ; RV64-VLEN64-NEXT: ret + ; + ; RV64-VLEN128-LABEL: rvv_stack_align8: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -48 + ; RV64-VLEN128-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: addi a0, sp, 32 + ; RV64-VLEN128-NEXT: addi a1, sp, 16 + ; RV64-VLEN128-NEXT: addi a2, sp, 8 + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: add sp, sp, a0 + ; RV64-VLEN128-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 48 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 8 %b = alloca i64 %c = alloca i64 @@ -57,41 +93,77 @@ } define void @rvv_stack_align16() #0 { - ; RV32-LABEL: rvv_stack_align16: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 - ; RV32-NEXT: add sp, sp, a0 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align16: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 1 + ; RV32-VLEN64-NEXT: add sp, sp, a0 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret + ; + ; RV32-VLEN128-LABEL: rvv_stack_align16: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: add sp, sp, a0 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align16: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -48 - ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 32 - ; RV64-NEXT: addi a1, sp, 16 - ; RV64-NEXT: addi a2, sp, 8 - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 - ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 48 - ; RV64-NEXT: ret + ; RV64-VLEN64-LABEL: rvv_stack_align16: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -48 + ; RV64-VLEN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: addi a0, sp, 32 + ; RV64-VLEN64-NEXT: addi a1, sp, 16 + ; RV64-VLEN64-NEXT: addi a2, sp, 8 + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 1 + ; RV64-VLEN64-NEXT: add sp, sp, a0 + ; RV64-VLEN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 48 + ; RV64-VLEN64-NEXT: ret + ; + ; RV64-VLEN128-LABEL: rvv_stack_align16: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -48 + ; RV64-VLEN128-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: addi a0, sp, 32 + ; RV64-VLEN128-NEXT: addi a1, sp, 16 + ; RV64-VLEN128-NEXT: addi a2, sp, 8 + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: add sp, sp, a0 + ; RV64-VLEN128-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 48 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 16 %b = alloca i64 %c = alloca i64 @@ -100,45 +172,85 @@ } define void @rvv_stack_align32() #0 { - ; RV32-LABEL: rvv_stack_align32: - ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -48 - ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill - ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill - ; RV32-NEXT: addi s0, sp, 48 - ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 2 - ; RV32-NEXT: sub sp, sp, a0 - ; RV32-NEXT: andi sp, sp, -32 - ; RV32-NEXT: addi a0, sp, 32 - ; RV32-NEXT: addi a1, sp, 16 - ; RV32-NEXT: addi a2, sp, 8 - ; RV32-NEXT: call extern@plt - ; RV32-NEXT: addi sp, s0, -48 - ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload - ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 48 - ; RV32-NEXT: ret + ; RV32-VLEN64-LABEL: rvv_stack_align32: + ; RV32-VLEN64: # %bb.0: + ; RV32-VLEN64-NEXT: addi sp, sp, -48 + ; RV32-VLEN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-VLEN64-NEXT: addi s0, sp, 48 + ; RV32-VLEN64-NEXT: csrr a0, vlenb + ; RV32-VLEN64-NEXT: slli a0, a0, 2 + ; RV32-VLEN64-NEXT: sub sp, sp, a0 + ; RV32-VLEN64-NEXT: andi sp, sp, -32 + ; RV32-VLEN64-NEXT: addi a0, sp, 32 + ; RV32-VLEN64-NEXT: addi a1, sp, 16 + ; RV32-VLEN64-NEXT: addi a2, sp, 8 + ; RV32-VLEN64-NEXT: call extern@plt + ; RV32-VLEN64-NEXT: addi sp, s0, -48 + ; RV32-VLEN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-VLEN64-NEXT: addi sp, sp, 48 + ; RV32-VLEN64-NEXT: ret + ; + ; RV32-VLEN128-LABEL: rvv_stack_align32: + ; RV32-VLEN128: # %bb.0: + ; RV32-VLEN128-NEXT: addi sp, sp, -48 + ; RV32-VLEN128-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-VLEN128-NEXT: addi s0, sp, 48 + ; RV32-VLEN128-NEXT: csrr a0, vlenb + ; RV32-VLEN128-NEXT: slli a0, a0, 1 + ; RV32-VLEN128-NEXT: sub sp, sp, a0 + ; RV32-VLEN128-NEXT: andi sp, sp, -32 + ; RV32-VLEN128-NEXT: addi a0, sp, 32 + ; RV32-VLEN128-NEXT: addi a1, sp, 16 + ; RV32-VLEN128-NEXT: addi a2, sp, 8 + ; RV32-VLEN128-NEXT: call extern@plt + ; RV32-VLEN128-NEXT: addi sp, s0, -48 + ; RV32-VLEN128-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-VLEN128-NEXT: addi sp, sp, 48 + ; RV32-VLEN128-NEXT: ret + ; + ; RV64-VLEN64-LABEL: rvv_stack_align32: + ; RV64-VLEN64: # %bb.0: + ; RV64-VLEN64-NEXT: addi sp, sp, -48 + ; RV64-VLEN64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill + ; RV64-VLEN64-NEXT: addi s0, sp, 48 + ; RV64-VLEN64-NEXT: csrr a0, vlenb + ; RV64-VLEN64-NEXT: slli a0, a0, 2 + ; RV64-VLEN64-NEXT: sub sp, sp, a0 + ; RV64-VLEN64-NEXT: andi sp, sp, -32 + ; RV64-VLEN64-NEXT: addi a0, sp, 32 + ; RV64-VLEN64-NEXT: addi a1, sp, 8 + ; RV64-VLEN64-NEXT: mv a2, sp + ; RV64-VLEN64-NEXT: call extern@plt + ; RV64-VLEN64-NEXT: addi sp, s0, -48 + ; RV64-VLEN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload + ; RV64-VLEN64-NEXT: addi sp, sp, 48 + ; RV64-VLEN64-NEXT: ret ; - ; RV64-LABEL: rvv_stack_align32: - ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -48 - ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill - ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill - ; RV64-NEXT: addi s0, sp, 48 - ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 2 - ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: andi sp, sp, -32 - ; RV64-NEXT: addi a0, sp, 32 - ; RV64-NEXT: addi a1, sp, 8 - ; RV64-NEXT: mv a2, sp - ; RV64-NEXT: call extern@plt - ; RV64-NEXT: addi sp, s0, -48 - ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload - ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 48 - ; RV64-NEXT: ret + ; RV64-VLEN128-LABEL: rvv_stack_align32: + ; RV64-VLEN128: # %bb.0: + ; RV64-VLEN128-NEXT: addi sp, sp, -48 + ; RV64-VLEN128-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: sd s0, 32(sp) # 8-byte Folded Spill + ; RV64-VLEN128-NEXT: addi s0, sp, 48 + ; RV64-VLEN128-NEXT: csrr a0, vlenb + ; RV64-VLEN128-NEXT: slli a0, a0, 1 + ; RV64-VLEN128-NEXT: sub sp, sp, a0 + ; RV64-VLEN128-NEXT: andi sp, sp, -32 + ; RV64-VLEN128-NEXT: addi a0, sp, 32 + ; RV64-VLEN128-NEXT: addi a1, sp, 8 + ; RV64-VLEN128-NEXT: mv a2, sp + ; RV64-VLEN128-NEXT: call extern@plt + ; RV64-VLEN128-NEXT: addi sp, s0, -48 + ; RV64-VLEN128-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: ld s0, 32(sp) # 8-byte Folded Reload + ; RV64-VLEN128-NEXT: addi sp, sp, 48 + ; RV64-VLEN128-NEXT: ret %a = alloca , align 32 %b = alloca i64 %c = alloca i64 diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -1,49 +1,77 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefix=RV32-VLEN64 ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefix=RV64-VLEN64 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefix=RV32-VLEN128 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 - -; FIXME: We are over-aligning the stack on V, wasting stack space. +; RUN: | FileCheck %s --check-prefix=RV64-VLEN128 define i64* @scalar_stack_align16() nounwind { -; RV32-LABEL: scalar_stack_align16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: call extern@plt -; RV32-NEXT: mv a0, sp -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: add sp, sp, a1 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret +; RV32-VLEN64-LABEL: scalar_stack_align16: +; RV32-VLEN64: # %bb.0: +; RV32-VLEN64-NEXT: addi sp, sp, -32 +; RV32-VLEN64-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-VLEN64-NEXT: csrr a0, vlenb +; RV32-VLEN64-NEXT: slli a0, a0, 1 +; RV32-VLEN64-NEXT: sub sp, sp, a0 +; RV32-VLEN64-NEXT: addi a0, sp, 16 +; RV32-VLEN64-NEXT: call extern@plt +; RV32-VLEN64-NEXT: mv a0, sp +; RV32-VLEN64-NEXT: csrr a1, vlenb +; RV32-VLEN64-NEXT: slli a1, a1, 1 +; RV32-VLEN64-NEXT: add sp, sp, a1 +; RV32-VLEN64-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-VLEN64-NEXT: addi sp, sp, 32 +; RV32-VLEN64-NEXT: ret +; +; RV64-VLEN64-LABEL: scalar_stack_align16: +; RV64-VLEN64: # %bb.0: +; RV64-VLEN64-NEXT: addi sp, sp, -32 +; RV64-VLEN64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-VLEN64-NEXT: csrr a0, vlenb +; RV64-VLEN64-NEXT: slli a0, a0, 1 +; RV64-VLEN64-NEXT: sub sp, sp, a0 +; RV64-VLEN64-NEXT: addi a0, sp, 16 +; RV64-VLEN64-NEXT: call extern@plt +; RV64-VLEN64-NEXT: mv a0, sp +; RV64-VLEN64-NEXT: csrr a1, vlenb +; RV64-VLEN64-NEXT: slli a1, a1, 1 +; RV64-VLEN64-NEXT: add sp, sp, a1 +; RV64-VLEN64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-VLEN64-NEXT: addi sp, sp, 32 +; RV64-VLEN64-NEXT: ret +; +; RV32-VLEN128-LABEL: scalar_stack_align16: +; RV32-VLEN128: # %bb.0: +; RV32-VLEN128-NEXT: addi sp, sp, -32 +; RV32-VLEN128-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-VLEN128-NEXT: csrr a0, vlenb +; RV32-VLEN128-NEXT: sub sp, sp, a0 +; RV32-VLEN128-NEXT: addi a0, sp, 16 +; RV32-VLEN128-NEXT: call extern@plt +; RV32-VLEN128-NEXT: mv a0, sp +; RV32-VLEN128-NEXT: csrr a1, vlenb +; RV32-VLEN128-NEXT: add sp, sp, a1 +; RV32-VLEN128-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-VLEN128-NEXT: addi sp, sp, 32 +; RV32-VLEN128-NEXT: ret ; -; RV64-LABEL: scalar_stack_align16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: call extern@plt -; RV64-NEXT: mv a0, sp -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add sp, sp, a1 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 -; RV64-NEXT: ret +; RV64-VLEN128-LABEL: scalar_stack_align16: +; RV64-VLEN128: # %bb.0: +; RV64-VLEN128-NEXT: addi sp, sp, -32 +; RV64-VLEN128-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-VLEN128-NEXT: csrr a0, vlenb +; RV64-VLEN128-NEXT: sub sp, sp, a0 +; RV64-VLEN128-NEXT: addi a0, sp, 16 +; RV64-VLEN128-NEXT: call extern@plt +; RV64-VLEN128-NEXT: mv a0, sp +; RV64-VLEN128-NEXT: csrr a1, vlenb +; RV64-VLEN128-NEXT: add sp, sp, a1 +; RV64-VLEN128-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-VLEN128-NEXT: addi sp, sp, 32 +; RV64-VLEN128-NEXT: ret %a = alloca %c = alloca i64, align 16 call void @extern(* %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir @@ -124,7 +124,7 @@ ; CHECK-NEXT: - { id: 0, name: buf1, type: default, offset: -48, size: 1, alignment: 8, ; CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, @@ -155,16 +155,13 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -40 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x10 = frame-setup SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: renamable $x8 = COPY $x14 ; CHECK-NEXT: renamable $x9 = COPY $x11 ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 72 :: (store (s64)) ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 64 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype