Index: llvm/lib/Target/RISCV/RISCVFrameLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -68,9 +68,6 @@ bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; - void processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS = nullptr) const override; - protected: const RISCVSubtarget &STI; @@ -81,7 +78,7 @@ int64_t Val, MachineInstr::MIFlag Flag) const; void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - int64_t Amount) const; + int64_t Amount, int64_t Padding) const; int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; }; } Index: llvm/lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -307,7 +307,8 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - int64_t Amount) const { + int64_t Amount, + int64_t Padding) const { assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); const RISCVInstrInfo *TII = STI.getInstrInfo(); @@ -316,6 +317,7 @@ if (Amount < 0) { Amount = -Amount; Opc = RISCV::SUB; + Padding = -Padding; } // 1. Multiply the number of v-slots to the length of registers @@ -323,7 +325,14 @@ // 2. SP = SP - RVV stack size BuildMI(MBB, MBBI, DL, TII->get(Opc), SPReg) .addReg(SPReg) - .addReg(FactorRegister); + .addReg(FactorRegister, RegState::Kill); + // 3. Make sure we get enough padding for the alignment required by the + // RVV objects. + if (Padding) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), SPReg) + .addReg(SPReg) + .addImm(Padding); + } } void RISCVFrameLowering::emitPrologue(MachineFunction &MF, @@ -480,8 +489,15 @@ } } - if (RVVStackSize) - adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); + if (RVVStackSize) { + // Padding so RVV objects are aligned to 8 and the stack remains aligned. + assert(getStackAlign() >= 8 && "Stack alignment is too small for RVV"); + int64_t RVVPadding = + !hasFP(MF) && (RVFI->getCalleeSavedStackSize() % 8 != 0) + ? getStackAlign().value() + : 0; + adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize, RVVPadding); + } if (hasFP(MF)) { // Realign Stack @@ -574,7 +590,16 @@ MachineInstr::FrameDestroy); } else { if (RVVStackSize) - adjustStackForRVV(MF, MBB, LastFrameDestroy, DL, RVVStackSize); + { + // Padding so RVV objects are aligned to 8 and the stack remains aligned. + assert(getStackAlign() >= 8 && "Stack alignment is too small for RVV"); + int64_t RVVPadding = + !hasFP(MF) && (RVFI->getCalleeSavedStackSize() % 8 != 0) + ? getStackAlign().value() + : 0; + adjustStackForRVV(MF, MBB, LastFrameDestroy, DL, RVVStackSize, + RVVPadding); + } } uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); @@ -676,9 +701,9 @@ if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += - StackOffset::get(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get( + alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), + RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -715,9 +740,9 @@ if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get(MFI.getStackSize() - - RVFI->getCalleeSavedStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get( + alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), + RVFI->getRVVStackSize()); } } } @@ -822,31 +847,20 @@ RegInfo->getSpillAlign(*RC), false); RS->addScavengingFrameIndex(RegScavFI); } -} -void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS) const { - auto *RVFI = MF.getInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { RVFI->setCalleeSavedStackSize(0); return; } - int64_t MinOffset = std::numeric_limits::max(); - int64_t MaxOffset = std::numeric_limits::min(); + unsigned Size = 0; for (const auto &Info : MFI.getCalleeSavedInfo()) { int FrameIdx = Info.getFrameIdx(); if (MFI.getStackID(FrameIdx) != TargetStackID::Default) continue; - int64_t Offset = MFI.getObjectOffset(FrameIdx); - int64_t ObjSize = MFI.getObjectSize(FrameIdx); - MinOffset = std::min(Offset, MinOffset); - MaxOffset = std::max(Offset + ObjSize, MaxOffset); + Size += MFI.getObjectSize(FrameIdx); } - - unsigned Size = alignTo(MaxOffset - MinOffset, 16); RVFI->setCalleeSavedStackSize(Size); } Index: llvm/test/CodeGen/RISCV/rvv/localvar.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -256,15 +256,15 @@ ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 ; RV64IV-NEXT: add a2, s1, a2 -; RV64IV-NEXT: addi a2, a2, 224 +; RV64IV-NEXT: addi a2, a2, 232 ; RV64IV-NEXT: call notdead2@plt ; RV64IV-NEXT: lw a0, 124(s1) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, s1, a0 -; RV64IV-NEXT: addi a0, a0, 224 +; RV64IV-NEXT: addi a0, a0, 232 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: addi a0, s1, 224 +; RV64IV-NEXT: addi a0, s1, 232 ; RV64IV-NEXT: vl2r.v v26, (a0) ; RV64IV-NEXT: lw a0, 120(s1) ; RV64IV-NEXT: addi sp, s0, -256 Index: llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -14,27 +14,33 @@ ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -43,16 +49,18 @@ ; SPILL-O2: # %bb.0: ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 ; SPILL-O2-NEXT: sub sp, sp, a1 ; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a1, sp, 8 +; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 8 ; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: lui a0, %hi(.L.str) ; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) @@ -60,8 +68,10 @@ ; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 8 ; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 8 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 @@ -78,4 +88,4 @@ } declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i32 %gvl) -declare i32 @puts(i8*); \ No newline at end of file +declare i32 @puts(i8*); Index: llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -17,22 +17,22 @@ ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: addi a1, a1, 24 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: addi a0, sp, 24 ; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: addi a1, sp, 24 ; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: addi a1, a1, 24 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; SPILL-O0-NEXT: # kill: def $x11 killed $x10 +; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 Index: llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -107,9 +107,9 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: add a2, s1, a2 -; CHECK-NEXT: addi a2, a2, 96 +; CHECK-NEXT: addi a2, a2, 104 ; CHECK-NEXT: vl1re64.v v25, (a2) -; CHECK-NEXT: addi a2, s1, 96 +; CHECK-NEXT: addi a2, s1, 104 ; CHECK-NEXT: vl2re64.v v26, (a2) ; CHECK-NEXT: lw a2, 64(s1) ; CHECK-NEXT: slli a1, a1, 2 Index: llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -10,11 +10,14 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 + ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill - ; CHECK-NEXT: vs2r.v v30, (sp) # Unknown-size Folded Spill + ; CHECK-NEXT: addi a0, sp, 16 + ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 + ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: lw s9, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -11,7 +11,7 @@ ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 16 + ; CHECK-NEXT: addi a0, sp, 24 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1