diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -30,6 +30,8 @@ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override; @@ -79,7 +81,8 @@ void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount, MachineInstr::MIFlag Flag) const; - int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; + std::pair + assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -250,6 +250,7 @@ // Determines the size of the frame and maximum call frame size. void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); // Get the number of bytes to allocate from the FrameInfo. uint64_t FrameSize = MFI.getStackSize(); @@ -262,6 +263,28 @@ // Update frame info. MFI.setStackSize(FrameSize); + + // When using SP or BP to access stack objects, we may require extra padding + // to ensure the bottom of the RVV stack is correctly aligned within the main + // stack. We calculate this as the amount required to align the scalar local + // variable section up to the RVV alignment. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) { + int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize(); + if (auto RVVPadding = + offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign())) + RVFI->setRVVPadding(RVVPadding); + } +} + +// Returns the stack size including RVV padding (when required), rounded back +// up to the required stack alignment. +uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( + const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); + return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, @@ -401,7 +424,7 @@ // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -482,7 +505,8 @@ // Emit the second SP adjustment after saving callee saved registers. if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, @@ -492,8 +516,8 @@ // don't emit an sp-based .cfi_def_cfa_offset if (!hasFP(MF)) { // Emit ".cfi_def_cfa_offset StackSize" - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset( + nullptr, getStackSizeWithRVVPadding(MF))); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -583,7 +607,7 @@ if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); - uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -603,7 +627,8 @@ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + uint64_t SecondSPAdjustAmount = + getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount; assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); @@ -661,8 +686,7 @@ if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); else - Offset += - StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding()); + Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF)); } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack @@ -678,18 +702,21 @@ // | realignment (the size of | | | // | this area is not counted | | | // | in MFI.getStackSize()) | | | - // |--------------------------| -- | - // | Padding after RVV | | | - // | (not counted in | | | - // | MFI.getStackSize()) | | | // |--------------------------| -- |-- MFI.getStackSize() + // | RVV alignment padding | | | + // | (not counted in | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | + // |--------------------------| -- | // | RVV objects | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | scalar local variables | | <----' // |--------------------------| -- <-- BP @@ -707,32 +734,38 @@ // | this area is not counted | | | // | in MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding after RVV | | | + // | RVV alignment padding | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- |-- MFI.getStackSize() // | RVV objects | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() or in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | scalar local variables | | <----' // |--------------------------| -- <-- SP } // The total amount of padding surrounding RVV objects is described by // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. + // objects to the required alignment. if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get( - alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), - RVFI->getRVVStackSize()); + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = MFI.getStackSize() - + RVFI->getCalleeSavedStackSize() + + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -755,8 +788,14 @@ // |--------------------------| // | VarSize objects | // |--------------------------| <-- SP - if (MFI.getStackID(FI) == TargetStackID::ScalableVector) + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + // We don't expect any extra RVV alignment padding, as the stack size + // and RVV object sections should be correct aligned in their own + // right. + assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) && + "Inconsistent stack layout"); Offset -= StackOffset::getFixed(MFI.getStackSize()); + } } else { // When using SP to access frame objects, we need to add RVV stack size. // @@ -766,15 +805,17 @@ // |--------------------------| | | // | callee-saved registers | | | // |--------------------------| -- | - // | Padding after RVV | | | + // | RVV alignment padding | | | // | (not counted in | | | - // | MFI.getStackSize()) | | | + // | MFI.getStackSize() but | | | + // | counted in | | | + // | RVFI.getRVVStackSize()) | | | // |--------------------------| -- | // | RVV objects | | |-- MFI.getStackSize() // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | - // | Padding before RVV | | | + // | padding before RVV | | | // | (not counted in | | | // | MFI.getStackSize()) | | | // |--------------------------| -- | @@ -783,23 +824,22 @@ // // The total amount of padding surrounding RVV objects is described by // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV - // objects to 8 bytes. + // objects to the required alignment. if (MFI.getStackID(FI) == TargetStackID::Default) { if (MFI.isFixedObjectIndex(FI)) { - Offset += - StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() + - RVFI->getLibCallStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) + + RVFI->getLibCallStackSize(), + RVFI->getRVVStackSize()); } else { Offset += StackOffset::getFixed(MFI.getStackSize()); } } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - int ScalarLocalVarSize = MFI.getStackSize() - - RVFI->getCalleeSavedStackSize() - - RVFI->getVarArgsSaveSize(); - Offset += StackOffset::get( - alignTo(ScalarLocalVarSize, 8), - RVFI->getRVVStackSize()); + // Ensure the base of the RVV stack is correctly aligned: add on the + // alignment padding. + int ScalarLocalVarSize = + MFI.getStackSize() - RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding(); + Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize()); } } } @@ -852,7 +892,7 @@ } } -int64_t +std::pair RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { // Create a buffer of RVV objects to allocate. SmallVector ObjectsToAllocate; @@ -868,19 +908,33 @@ // Allocate all RVV locals and spills int64_t Offset = 0; + // The minimum alignment is 16 bytes. + Align RVVStackAlign(16); for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); + auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. if (ObjectSize < 8) ObjectSize = 8; - // Currently, all scalable vector types are aligned to 8 bytes. - Offset = alignTo(Offset + ObjectSize, 8); + Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); + // Update the maximum alignment of the RVV stack section + RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); } - return Offset; + // Ensure the alignment of the RVV stack. Since we want the most-aligned + // object right at the bottom (i.e., any padding at the top of the frame), + // readjust all RVV objects down by the alignment padding. + uint64_t StackSize = Offset; + if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { + StackSize += AlignmentPadding; + for (int FI : ObjectsToAllocate) + MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + } + + return std::make_pair(StackSize, RVVStackAlign); } static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) { @@ -901,8 +955,13 @@ const TargetRegisterClass *RC = &RISCV::GPRRegClass; auto *RVFI = MF.getInfo(); - int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + int64_t RVVStackSize; + Align RVVStackAlign; + std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); + RVFI->setRVVStackAlign(RVVStackAlign); + const RISCVInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // estimateStackSize has been observed to under-estimate the final stack @@ -941,16 +1000,6 @@ Size += MFI.getObjectSize(FrameIdx); } RVFI->setCalleeSavedStackSize(Size); - - // Padding required to keep the RVV stack aligned to 8 bytes within the main - // stack. We only need this when using SP or BP to access stack objects. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - if (RVVStackSize && (!hasFP(MF) || TRI->hasStackRealignment(MF)) && - Size % 8 != 0) { - // Because we add the padding to the size of the stack, adding - // getStackAlign() will keep it aligned. - RVFI->setRVVPadding(getStackAlign().value()); - } } static bool hasRVVFrameObject(const MachineFunction &MF) { @@ -1025,7 +1074,7 @@ const auto *RVFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); - uint64_t StackSize = MFI.getStackSize(); + uint64_t StackSize = getStackSizeWithRVVPadding(MF); // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved // registers will be pushed by the save-restore libcalls, so we don't have to diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -57,6 +57,8 @@ unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Alignment of RVV stack. + Align RVVStackAlign; /// Padding required to keep RVV stack aligned within the main stack. uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers @@ -92,6 +94,9 @@ uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + Align getRVVStackAlign() const { return RVVStackAlign; } + void setRVVStackAlign(Align StackAlign) { RVVStackAlign = StackAlign; } + uint64_t getRVVPadding() const { return RVVPadding; } void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; } diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -34,6 +34,7 @@ ; RV64IV-NEXT: addi sp, sp, -544 ; RV64IV-NEXT: .cfi_def_cfa_offset 544 ; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: addi a0, sp, 24 ; RV64IV-NEXT: vl1re64.v v8, (a0) @@ -43,6 +44,7 @@ ; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 544 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,16 +38,19 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB + ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x11 = PseudoReadVLENB + ; CHECK-NEXT: $x11 = SLLI killed $x11, 1 ; CHECK-NEXT: $x10 = LUI 1048575 ; CHECK-NEXT: $x10 = ADDIW killed $x10, 1824 ; CHECK-NEXT: $x10 = ADD $x8, killed $x10 ; CHECK-NEXT: $x10 = SUB killed $x10, killed $x11 ; CHECK-NEXT: VS1R_V killed renamable $v8, killed renamable $x10 ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB + ; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 240 ; CHECK-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -1,15 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA +; RUN: | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA +; RUN: | FileCheck %s define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ret %v = alloca @@ -33,18 +35,18 @@ define void @lmul4() nounwind { ; CHECK-LABEL: lmul4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v = alloca ret void @@ -53,45 +55,33 @@ define void @lmul8() nounwind { ; CHECK-LABEL: lmul8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v = alloca ret void } define void @lmul1_and_2() nounwind { -; NOZBA-LABEL: lmul1_and_2: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1_and_2: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -100,19 +90,18 @@ define void @lmul2_and_4() nounwind { ; CHECK-LABEL: lmul2_and_4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -120,103 +109,57 @@ } define void @lmul1_and_4() nounwind { -; NOZBA-LABEL: lmul1_and_4: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1_and_4: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul2_and_1() nounwind { -; NOZBA-LABEL: lmul2_and_1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul2_and_1: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul2_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_1() nounwind { -; NOZBA-LABEL: lmul4_and_1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul4_and_1: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: lmul4_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -225,19 +168,18 @@ define void @lmul4_and_2() nounwind { ; CHECK-LABEL: lmul4_and_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -247,19 +189,18 @@ define void @lmul4_and_2_x2_0() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 12 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -271,19 +212,19 @@ define void @lmul4_and_2_x2_1() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 12 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret %v1 = alloca %v3 = alloca @@ -294,35 +235,19 @@ define void @gpr_and_lmul1_and_2() nounwind { -; NOZBA-LABEL: gpr_and_lmul1_and_2: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -16 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: li a0, 3 -; NOZBA-NEXT: sd a0, 8(sp) -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 1 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: addi sp, sp, 16 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: gpr_and_lmul1_and_2: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -16 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: li a0, 3 -; ZBA-NEXT: sd a0, 8(sp) -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add a0, a0, a0 -; ZBA-NEXT: add sp, sp, a0 -; ZBA-NEXT: addi sp, sp, 16 -; ZBA-NEXT: ret +; CHECK-LABEL: gpr_and_lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -331,42 +256,23 @@ } define void @gpr_and_lmul1_and_4() nounwind { -; NOZBA-LABEL: gpr_and_lmul1_and_4: -; NOZBA: # %bb.0: -; NOZBA-NEXT: addi sp, sp, -32 -; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; NOZBA-NEXT: addi s0, sp, 32 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a1, a0, 2 -; NOZBA-NEXT: add a0, a1, a0 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: andi sp, sp, -32 -; NOZBA-NEXT: li a0, 3 -; NOZBA-NEXT: sd a0, 8(sp) -; NOZBA-NEXT: addi sp, s0, -32 -; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; NOZBA-NEXT: addi sp, sp, 32 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: gpr_and_lmul1_and_4: -; ZBA: # %bb.0: -; ZBA-NEXT: addi sp, sp, -32 -; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; ZBA-NEXT: addi s0, sp, 32 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add a0, a0, a0 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: andi sp, sp, -32 -; ZBA-NEXT: li a0, 3 -; ZBA-NEXT: sd a0, 8(sp) -; ZBA-NEXT: addi sp, s0, -32 -; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; ZBA-NEXT: addi sp, sp, 32 -; ZBA-NEXT: ret +; CHECK-LABEL: gpr_and_lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -377,19 +283,18 @@ define void @lmul_1_2_4_8() nounwind { ; CHECK-LABEL: lmul_1_2_4_8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -401,19 +306,18 @@ define void @lmul_1_2_4_8_x2_0() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 30 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -429,19 +333,18 @@ define void @lmul_1_2_4_8_x2_1() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 30 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %v8 = alloca %v7 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -286,9 +286,9 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( %x, %y, i32 %w) { ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 @@ -298,12 +298,12 @@ ; RV32-NEXT: add a3, a0, a1 ; RV32-NEXT: vl8re32.v v24, (a3) ; RV32-NEXT: vl8re32.v v0, (a0) -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vs8r.v v16, (a0) -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: li a3, 2 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: vs8r.v v8, (a1) ; RV32-NEXT: vmv8r.v v8, v0 ; RV32-NEXT: vmv8r.v v16, v24 @@ -311,15 +311,15 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 4 @@ -329,12 +329,12 @@ ; RV64-NEXT: add a3, a0, a1 ; RV64-NEXT: vl8re32.v v24, (a3) ; RV64-NEXT: vl8re32.v v0, (a0) -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vs8r.v v16, (a0) -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: li a3, 2 -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: vs8r.v v8, (a1) ; RV64-NEXT: vmv8r.v v8, v0 ; RV64-NEXT: vmv8r.v v16, v24 @@ -342,8 +342,8 @@ ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %t = call fastcc @ext2( %y, %x, i32 %w, i32 2) ret %t @@ -352,9 +352,9 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32( %x, %y, %z, i32 %w) { ; RV32-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 48 @@ -367,66 +367,66 @@ ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: addi a3, a3, 128 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: add a3, a0, a1 ; RV32-NEXT: vl8re32.v v24, (a3) ; RV32-NEXT: vl8re32.v v0, (a2) -; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: addi a2, sp, 128 ; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl8re32.v v0, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vs8r.v v16, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vs8r.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a2, a1, 16 +; RV32-NEXT: addi a2, a1, 128 ; RV32-NEXT: li a5, 42 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vs8r.v v0, (a1) -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: call ext3@plt ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 48 @@ -439,59 +439,59 @@ ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 24 +; RV64-NEXT: addi a3, a3, 128 ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: add a3, a0, a1 ; RV64-NEXT: vl8re32.v v24, (a3) ; RV64-NEXT: vl8re32.v v0, (a2) -; RV64-NEXT: addi a2, sp, 24 +; RV64-NEXT: addi a2, sp, 128 ; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vl8re32.v v0, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vs8r.v v16, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vs8r.v v24, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a2, a1, 24 +; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: li a5, 42 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 24 +; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vs8r.v v0, (a1) -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 24 +; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: call ext3@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 48 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %t = call fastcc @ext3( %z, %y, %x, i32 %w, i32 42) ret %t @@ -520,16 +520,16 @@ define fastcc @pass_vector_arg_indirect_stack( %x, %y, %z) { ; RV32-LABEL: pass_vector_arg_indirect_stack: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu ; RV32-NEXT: vmv.v.i v8, 0 @@ -537,10 +537,10 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: vs8r.v v8, (a0) -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: li a2, 2 @@ -552,13 +552,13 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi t2, a0, 16 -; RV32-NEXT: addi t4, sp, 16 +; RV32-NEXT: addi t2, a0, 128 +; RV32-NEXT: addi t4, sp, 128 ; RV32-NEXT: li t6, 8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: li a0, 0 ; RV32-NEXT: vmv.v.i v16, 0 @@ -566,22 +566,22 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: pass_vector_arg_indirect_stack: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: add a1, a1, a0 ; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, mu ; RV64-NEXT: vmv.v.i v8, 0 @@ -589,10 +589,10 @@ ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 24 +; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: vs8r.v v8, (a0) -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: li a1, 1 ; RV64-NEXT: li a2, 2 @@ -604,13 +604,13 @@ ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi t2, a0, 24 -; RV64-NEXT: addi t4, sp, 24 +; RV64-NEXT: addi t2, a0, 128 +; RV64-NEXT: addi t4, sp, 128 ; RV64-NEXT: li t6, 8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 24 +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: li a0, 0 ; RV64-NEXT: vmv.v.i v16, 0 @@ -618,8 +618,8 @@ ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %s = call fastcc @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, zeroinitializer, zeroinitializer, zeroinitializer, i32 8) ret %s diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -23,58 +23,58 @@ define @caller_scalable_vector_split_indirect( %x) { ; RV32-LABEL: caller_scalable_vector_split_indirect: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -144 +; RV32-NEXT: .cfi_def_cfa_offset 144 +; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: vs8r.v v16, (a0) -; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: call callee_scalable_vector_split_indirect@plt ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 144 ; RV32-NEXT: ret ; ; RV64-LABEL: caller_scalable_vector_split_indirect: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: .cfi_def_cfa_offset 144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: vs8r.v v16, (a0) -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: call callee_scalable_vector_split_indirect@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 ; RV64-NEXT: ret %c = alloca i64 %a = call @callee_scalable_vector_split_indirect( zeroinitializer, %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 51 + ; CHECK-NEXT: $x11 = frame-setup ADDI killed $x0, 52 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -559,21 +559,21 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out) { ; CHECK-LABEL: insert_v2i64_nxv16i64_hi: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 80 +; CHECK-NEXT: addi a0, sp, 128 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: addi a2, sp, 64 ; CHECK-NEXT: add a2, a2, a0 ; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: addi a2, sp, 64 ; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: vs8r.v v8, (a0) @@ -581,7 +581,7 @@ ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1833,8 +1833,8 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a3, 3 -; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu @@ -1882,8 +1882,8 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -85,13 +85,13 @@ define void @local_var_m4() { ; RV64IV-LABEL: local_var_m4: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -32 -; RV64IV-NEXT: .cfi_def_cfa_offset 32 -; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -48 +; RV64IV-NEXT: .cfi_def_cfa_offset 48 +; RV64IV-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: addi s0, sp, 48 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 @@ -100,14 +100,14 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: addi a0, a0, 32 ; RV64IV-NEXT: vl4r.v v8, (a0) -; RV64IV-NEXT: addi a0, sp, 16 +; RV64IV-NEXT: addi a0, sp, 32 ; RV64IV-NEXT: vl4r.v v8, (a0) -; RV64IV-NEXT: addi sp, s0, -32 -; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: addi sp, s0, -48 +; RV64IV-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 48 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -119,13 +119,13 @@ define void @local_var_m8() { ; RV64IV-LABEL: local_var_m8: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 -; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -80 +; RV64IV-NEXT: .cfi_def_cfa_offset 80 +; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: addi s0, sp, 80 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 @@ -134,14 +134,14 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: addi a0, sp, 48 +; RV64IV-NEXT: addi a0, sp, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: addi sp, s0, -80 +; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 80 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -233,15 +233,15 @@ define void @local_var_m2_with_bp(i64 %n) { ; RV64IV-LABEL: local_var_m2_with_bp: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -256 -; RV64IV-NEXT: .cfi_def_cfa_offset 256 -; RV64IV-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s1, 232(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -272 +; RV64IV-NEXT: .cfi_def_cfa_offset 272 +; RV64IV-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s1, 248(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 ; RV64IV-NEXT: .cfi_offset s1, -24 -; RV64IV-NEXT: addi s0, sp, 256 +; RV64IV-NEXT: addi s0, sp, 272 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a1, vlenb ; RV64IV-NEXT: slli a1, a1, 2 @@ -256,22 +256,22 @@ ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 ; RV64IV-NEXT: add a2, s1, a2 -; RV64IV-NEXT: addi a2, a2, 232 +; RV64IV-NEXT: addi a2, a2, 240 ; RV64IV-NEXT: call notdead2@plt ; RV64IV-NEXT: lw a0, 124(s1) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, s1, a0 -; RV64IV-NEXT: addi a0, a0, 232 +; RV64IV-NEXT: addi a0, a0, 240 ; RV64IV-NEXT: vl2r.v v8, (a0) -; RV64IV-NEXT: addi a0, s1, 232 +; RV64IV-NEXT: addi a0, s1, 240 ; RV64IV-NEXT: vl2r.v v8, (a0) ; RV64IV-NEXT: lw a0, 120(s1) -; RV64IV-NEXT: addi sp, s0, -256 -; RV64IV-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s1, 232(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 256 +; RV64IV-NEXT: addi sp, s0, -272 +; RV64IV-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s1, 248(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 272 ; RV64IV-NEXT: ret %1 = alloca i8, i64 %n %2 = alloca i32, align 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -27,13 +27,13 @@ define @caller() { ; RV64IV-LABEL: caller: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 -; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -80 +; RV64IV-NEXT: .cfi_def_cfa_offset 80 +; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: addi s0, sp, 80 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 5 @@ -43,26 +43,26 @@ ; RV64IV-NEXT: li a1, 24 ; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v8, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v16, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v24, (a0) -; RV64IV-NEXT: addi a0, sp, 48 -; RV64IV-NEXT: addi a1, sp, 48 +; RV64IV-NEXT: addi a0, sp, 64 +; RV64IV-NEXT: addi a1, sp, 64 ; RV64IV-NEXT: vs8r.v v24, (a1) ; RV64IV-NEXT: call callee@plt -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: addi sp, s0, -80 +; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 80 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -5,17 +5,18 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -80 -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -96 +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: addi s0, sp, 96 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -8 ; CHECK-NEXT: mv s1, sp @@ -40,11 +41,11 @@ ; CHECK-NEXT: call gfunc@plt ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: addi sp, s0, -80 -; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 80 +; CHECK-NEXT: addi sp, s0, -96 +; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret entry: %aa.addr = alloca i32, align 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -45,20 +45,20 @@ ; ; SPILL-O2-LABEL: foo: ; SPILL-O2: # %bb.0: -; SPILL-O2-NEXT: addi sp, sp, -16 -; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -32 +; SPILL-O2-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 ; SPILL-O2-NEXT: sub sp, sp, a1 ; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: addi a1, sp, 8 +; SPILL-O2-NEXT: addi a1, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; SPILL-O2-NEXT: vfadd.vv v9, v8, v9 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: addi a0, a0, 8 +; SPILL-O2-NEXT: addi a0, a0, 16 ; SPILL-O2-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: lui a0, %hi(.L.str) ; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) @@ -66,17 +66,17 @@ ; SPILL-O2-NEXT: vsetvli zero, s0, e64, m1, ta, mu ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: addi a0, a0, 8 +; SPILL-O2-NEXT: addi a0, a0, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: addi a0, sp, 8 +; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v9, v8 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 -; SPILL-O2-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; SPILL-O2-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; SPILL-O2-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; SPILL-O2-NEXT: addi sp, sp, 32 ; SPILL-O2-NEXT: ret { %x = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i32 %gvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -9,6 +9,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -17,6 +18,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -25,6 +27,7 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -33,6 +36,7 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -48,6 +52,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -56,6 +61,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -64,6 +70,7 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -72,6 +79,7 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -9,6 +9,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -20,6 +21,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,6 +65,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -74,6 +77,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -9,28 +9,28 @@ define @foo( %a, %b, %c, i64 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -32 -; SPILL-O0-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -48 +; SPILL-O0-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 24 +; SPILL-O0-NEXT: addi a1, a1, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 -; SPILL-O0-NEXT: addi a0, sp, 24 +; SPILL-O0-NEXT: addi a0, sp, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: addi a1, sp, 24 +; SPILL-O0-NEXT: addi a1, sp, 32 ; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 24 +; SPILL-O0-NEXT: addi a1, a1, 32 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload @@ -39,8 +39,8 @@ ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 -; SPILL-O0-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; SPILL-O0-NEXT: addi sp, sp, 32 +; SPILL-O0-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; SPILL-O0-NEXT: addi sp, sp, 48 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: foo: diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -9,6 +9,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -17,6 +18,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -25,6 +27,7 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -33,6 +36,7 @@ ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -9,6 +9,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -20,6 +21,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -63,6 +65,7 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) @@ -74,6 +77,7 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -22,34 +22,34 @@ define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 4 ; CHECK-NEXT: sub sp, sp, t0 -; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: addi t0, sp, 64 ; CHECK-NEXT: sd t0, 8(sp) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 ; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: sd t0, 0(sp) -; CHECK-NEXT: addi t0, sp, 40 +; CHECK-NEXT: addi t0, sp, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 3 ; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 40 +; CHECK-NEXT: addi t0, t0, 64 ; CHECK-NEXT: vs8r.v v8, (t0) ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: call bar@plt ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret %ret = call @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x, %x, %x, %x) ret %ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -9,8 +9,7 @@ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, 15 @@ -22,8 +21,7 @@ ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub a2, s0, a2 ; CHECK-NEXT: addi a2, a2, -32 ; CHECK-NEXT: vl2re64.v v8, (a2) @@ -56,12 +54,12 @@ ; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 128 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 112 ; CHECK-NEXT: vl1re64.v v8, (a0) @@ -88,14 +86,13 @@ define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind { ; CHECK-LABEL: rvv_vla_and_overaligned: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -128 -; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 128 +; CHECK-NEXT: addi sp, sp, -144 +; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 144 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 1 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp @@ -105,21 +102,22 @@ ; CHECK-NEXT: sub a0, sp, a0 ; CHECK-NEXT: mv sp, a0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a2, s1, a2 -; CHECK-NEXT: addi a2, a2, 104 +; CHECK-NEXT: addi a2, a2, 112 ; CHECK-NEXT: vl1re64.v v8, (a2) -; CHECK-NEXT: addi a2, s1, 104 +; CHECK-NEXT: addi a2, s1, 112 ; CHECK-NEXT: vl2re64.v v8, (a2) ; CHECK-NEXT: lw a2, 64(s1) ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: addi sp, s0, -128 -; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 128 +; CHECK-NEXT: addi sp, s0, -144 +; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 144 ; CHECK-NEXT: ret %overaligned = alloca i32, align 64 %vla.addr = alloca i32, i64 %n diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir --- a/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-stack-align.mir @@ -34,20 +34,20 @@ ; ; RV64-LABEL: rvv_stack_align8: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 24 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: addi a2, sp, 8 ; RV64-NEXT: call extern@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 8 %b = alloca i64 @@ -56,8 +56,6 @@ ret void } - ; FIXME: The alloca is not correctly aligned to 16 bytes. - define void @rvv_stack_align16() #0 { ; RV32-LABEL: rvv_stack_align16: ; RV32: # %bb.0: @@ -79,20 +77,20 @@ ; ; RV64-LABEL: rvv_stack_align16: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 - ; RV64-NEXT: addi a0, sp, 24 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: addi a2, sp, 8 ; RV64-NEXT: call extern@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 16 %b = alloca i64 @@ -101,47 +99,45 @@ ret void } - ; FIXME: The alloca is not correctly aligned to 32 bytes. - define void @rvv_stack_align32() #0 { ; RV32-LABEL: rvv_stack_align32: ; RV32: # %bb.0: - ; RV32-NEXT: addi sp, sp, -32 - ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill - ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill - ; RV32-NEXT: addi s0, sp, 32 + ; RV32-NEXT: addi sp, sp, -48 + ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill + ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill + ; RV32-NEXT: addi s0, sp, 48 ; RV32-NEXT: csrr a0, vlenb - ; RV32-NEXT: slli a0, a0, 1 + ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: andi sp, sp, -32 - ; RV32-NEXT: addi a0, sp, 24 + ; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: addi a2, sp, 8 ; RV32-NEXT: call extern@plt - ; RV32-NEXT: addi sp, s0, -32 - ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload - ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload - ; RV32-NEXT: addi sp, sp, 32 + ; RV32-NEXT: addi sp, s0, -48 + ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload + ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload + ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: rvv_stack_align32: ; RV64: # %bb.0: - ; RV64-NEXT: addi sp, sp, -32 - ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill - ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill - ; RV64-NEXT: addi s0, sp, 32 + ; RV64-NEXT: addi sp, sp, -48 + ; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill + ; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill + ; RV64-NEXT: addi s0, sp, 48 ; RV64-NEXT: csrr a0, vlenb - ; RV64-NEXT: slli a0, a0, 1 + ; RV64-NEXT: slli a0, a0, 2 ; RV64-NEXT: sub sp, sp, a0 ; RV64-NEXT: andi sp, sp, -32 - ; RV64-NEXT: addi a0, sp, 16 + ; RV64-NEXT: addi a0, sp, 32 ; RV64-NEXT: addi a1, sp, 8 ; RV64-NEXT: mv a2, sp ; RV64-NEXT: call extern@plt - ; RV64-NEXT: addi sp, s0, -32 - ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload - ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload - ; RV64-NEXT: addi sp, sp, 32 + ; RV64-NEXT: addi sp, s0, -48 + ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload + ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload + ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret %a = alloca , align 32 %b = alloca i64 diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -8,11 +8,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 -; FIXME: The stack is assumed and required to be aligned to 16 bytes, but we -; only ensure an 8-byte alignment for the size of the section containing RVV -; objects. After establishing sp, on zve64x the stack is only 8-byte aligned. -; This is wrong in and of itself, but we can see that this also has the effect -; that the 16-byte-aligned object at the bottom of the stack is misaligned. +; FIXME: We are over-aligning the stack on V, wasting stack space. define i64* @scalar_stack_align16() nounwind { ; RV32-LABEL: scalar_stack_align16: @@ -20,11 +16,13 @@ ; RV32-NEXT: addi sp, sp, -32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: call extern@plt ; RV32-NEXT: mv a0, sp ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: add sp, sp, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -32,17 +30,19 @@ ; ; RV64-LABEL: scalar_stack_align16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: call extern@plt ; RV64-NEXT: mv a0, sp ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add sp, sp, a1 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 32 ; RV64-NEXT: ret %a = alloca %c = alloca i64, align 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-offset-for-rvv-object.mir @@ -4,17 +4,19 @@ # Stack layout of this program # |--------------------------| -- <-- Incoming SP # | a7 (Vaarg) | -# | ------------------------ | -- <-- New SP + vlenb + 56 +# | ------------------------ | -- <-- New SP + vlenb + 72 # | a6 (Vaarg) | -# | ------------------------ | -- <-- New SP + vlenb + 48 +# | ------------------------ | -- <-- New SP + vlenb + 64 # | ra (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 40 +# | ------------------------ | -- <-- New SP + vlenb + 56 # | s0 (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 32 +# | ------------------------ | -- <-- New SP + vlenb + 48 # | s1 (Callee-saved reg) | -# | ------------------------ | -- <-- New SP + vlenb + 24 +# | ------------------------ | -- <-- New SP + vlenb + 40 +# | 8 bytes of padding | +# | ------------------------ | -- <-- New SP + vlenb # | v8 (RVV objects) | -# | ------------------------ | -- <-- New SP + 24 +# | ------------------------ | -- <-- New SP + 32 # | buf1 | # |--------------------------| -- <-- New SP + 16 # | Stack ID 5 | @@ -122,7 +124,7 @@ ; CHECK-NEXT: - { id: 0, name: buf1, type: default, offset: -48, size: 1, alignment: 8, ; CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + ; CHECK-NEXT: - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8, @@ -144,27 +146,30 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $x11, $x14, $x16, $x17, $x1, $x8, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -64 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 64 - ; CHECK-NEXT: SD killed $x1, $x2, 40 :: (store (s64) into %stack.2) - ; CHECK-NEXT: SD killed $x8, $x2, 32 :: (store (s64) into %stack.3) - ; CHECK-NEXT: SD killed $x9, $x2, 24 :: (store (s64) into %stack.4) + ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 80 + ; CHECK-NEXT: SD killed $x1, $x2, 56 :: (store (s64) into %stack.2) + ; CHECK-NEXT: SD killed $x8, $x2, 48 :: (store (s64) into %stack.3) + ; CHECK-NEXT: SD killed $x9, $x2, 40 :: (store (s64) into %stack.4) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -24 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -32 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -40 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB + ; CHECK-NEXT: $x10 = frame-setup SLLI killed $x10, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: renamable $x8 = COPY $x14 ; CHECK-NEXT: renamable $x9 = COPY $x11 ; CHECK-NEXT: $x10 = PseudoReadVLENB + ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 - ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 56 :: (store (s64)) + ; CHECK-NEXT: SD killed renamable $x17, killed $x10, 72 :: (store (s64)) ; CHECK-NEXT: $x10 = PseudoReadVLENB + ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = ADD $x2, killed $x10 - ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 48 :: (store (s64) into %fixed-stack.1, align 16) + ; CHECK-NEXT: SD killed renamable $x16, killed $x10, 64 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVMV_V_I_MF8 0, 2, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $x10 = ADDI $x2, 24 + ; CHECK-NEXT: $x10 = ADDI $x2, 32 ; CHECK-NEXT: PseudoVSPILL_M1 killed renamable $v8, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.while.cond: @@ -179,7 +184,7 @@ ; CHECK-NEXT: liveins: $x8, $x9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 69 /* e8, mf8, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $x10 = ADDI $x2, 24 + ; CHECK-NEXT: $x10 = ADDI $x2, 32 ; CHECK-NEXT: renamable $v8 = PseudoVRELOAD_M1 killed $x10 :: (load unknown-size from %stack.1, align 8) ; CHECK-NEXT: PseudoVSE8_V_MF8 killed renamable $v8, renamable $x8, 2, 3 /* e8 */, implicit $vl, implicit $vtype :: (store (s16) into %ir.0, align 1) ; CHECK-NEXT: $x10 = COPY renamable $x9 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -40,7 +40,7 @@ ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 56 + ; CHECK-NEXT: addi a0, sp, 64 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: lw ra, 76(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -9,19 +9,19 @@ define void @foo() #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: addi sp, sp, -32 - ; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill + ; CHECK-NEXT: addi sp, sp, -48 + ; CHECK-NEXT: sd s9, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 24 + ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 - ; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload - ; CHECK-NEXT: addi sp, sp, 32 + ; CHECK-NEXT: ld s9, 40(sp) # 8-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret entry: ret void