Index: llvm/lib/Target/RISCV/RISCVFrameLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -68,9 +68,6 @@ bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; - void processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS = nullptr) const override; - protected: const RISCVSubtarget &STI; Index: llvm/lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -323,7 +323,7 @@ // 2. SP = SP - RVV stack size BuildMI(MBB, MBBI, DL, TII->get(Opc), SPReg) .addReg(SPReg) - .addReg(FactorRegister); + .addReg(FactorRegister, RegState::Kill); } void RISCVFrameLowering::emitPrologue(MachineFunction &MF, @@ -385,7 +385,7 @@ // FIXME (note copied from Lanai): This appears to be overallocating. Needs // investigation. Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI.getStackSize(); + uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -560,7 +560,7 @@ if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); - uint64_t StackSize = MFI.getStackSize(); + uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding(); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); uint64_t RVVStackSize = RVFI->getRVVStackSize(); @@ -637,7 +637,8 @@ if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); else - Offset += StackOffset::getFixed(MFI.getStackSize()); + Offset += + StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding()); } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack @@ -645,40 +646,63 @@ if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); // |--------------------------| -- <-- FP - // | callee-saved registers | | <---------. - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV objects | | | - // |--------------------------| -- | - // | scalar local variables | | <---------' + // | callee-saved registers | | <----. + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- | + // | Padding after RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV objects | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | Padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | scalar local variables | | <----' // |--------------------------| -- <-- BP // | VarSize objects | | // |--------------------------| -- <-- SP } else { FrameReg = RISCV::X2; // |--------------------------| -- <-- FP - // | callee-saved registers | | <---------. - // |--------------------------| -- | - // | realignment (the size of | | | - // | this area is not counted | | | - // | in MFI.getStackSize()) | | | - // |--------------------------| -- |-- MFI.getStackSize() - // | RVV objects | | | - // |--------------------------| -- | - // | scalar local variables | | <---------' + // | callee-saved registers | | <----. + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- | + // | Padding after RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV objects | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | Padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | scalar local variables | | <----' // |--------------------------| -- <-- SP } + // The total amount of padding surrounding RVV objects is described by + // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV + // objects to 8 bytes. if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += - StackOffset::get(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get( + alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), + RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -704,20 +728,34 @@ // When using SP to access frame objects, we need to add RVV stack size. // // |--------------------------| -- <-- FP - // | callee-saved registers | |<--------. - // |--------------------------| -- | - // | RVV objects | | |-- MFI.getStackSize() - // |--------------------------| -- | - // | scalar local variables | |<--------' + // | callee-saved registers | | <----. + // |--------------------------| -- | + // | Padding after RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | Padding before RVV | | | + // | (not counted in | | | + // | MFI.getStackSize() | | | + // |--------------------------| -- | + // | scalar local variables | | <----' // |--------------------------| -- <-- SP + // + // The total amount of padding surrounding RVV objects is described by + // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV + // objects to 8 bytes. if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { - Offset += StackOffset::get(MFI.getStackSize() - - RVFI->getCalleeSavedStackSize(), - RVFI->getRVVStackSize()); + Offset += StackOffset::get( + alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), + RVFI->getRVVStackSize()); } } } @@ -822,32 +860,29 @@ RegInfo->getSpillAlign(*RC), false); RS->addScavengingFrameIndex(RegScavFI); } -} -void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS) const { - auto *RVFI = MF.getInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { RVFI->setCalleeSavedStackSize(0); return; } - int64_t MinOffset = std::numeric_limits::max(); - int64_t MaxOffset = std::numeric_limits::min(); + unsigned Size = 0; for (const auto &Info : MFI.getCalleeSavedInfo()) { int FrameIdx = Info.getFrameIdx(); if (MFI.getStackID(FrameIdx) != TargetStackID::Default) continue; - int64_t Offset = MFI.getObjectOffset(FrameIdx); - int64_t ObjSize = MFI.getObjectSize(FrameIdx); - MinOffset = std::min(Offset, MinOffset); - MaxOffset = std::max(Offset + ObjSize, MaxOffset); + Size += MFI.getObjectSize(FrameIdx); } - - unsigned Size = alignTo(MaxOffset - MinOffset, 16); RVFI->setCalleeSavedStackSize(Size); + + // Padding required to keep the RVV stack aligned to 8 bytes + // within the main stack. We only need this when not using FP. + if (RVVStackSize && !hasFP(MF) && Size % 8 != 0) { + // Because we add the padding to the size of the stack, adding + // getStackAlign() will keep it aligned. + RVFI->setRVVPadding(getStackAlign().value()); + } } // Not preserve stack space within prologue for outgoing variables when the Index: llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -34,6 +34,8 @@ unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Padding required to keep RVV stack aligned within the main stack. + uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; @@ -66,6 +68,9 @@ uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + uint64_t getRVVPadding() const { return RVVPadding; } + void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; } + unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } }; Index: llvm/test/CodeGen/RISCV/rvv/localvar.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -256,15 +256,15 @@ ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 ; RV64IV-NEXT: add a2, s1, a2 -; RV64IV-NEXT: addi a2, a2, 224 +; RV64IV-NEXT: addi a2, a2, 232 ; RV64IV-NEXT: call notdead2@plt ; RV64IV-NEXT: lw a0, 124(s1) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, s1, a0 -; RV64IV-NEXT: addi a0, a0, 224 +; RV64IV-NEXT: addi a0, a0, 232 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: addi a0, s1, 224 +; RV64IV-NEXT: addi a0, s1, 232 ; RV64IV-NEXT: vl2r.v v26, (a0) ; RV64IV-NEXT: lw a0, 120(s1) ; RV64IV-NEXT: addi sp, s0, -256 Index: llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -9,50 +9,56 @@ define @foo( %a, %b, %c, i32 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -16 -; SPILL-O0-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -32 +; SPILL-O0-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 ; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 -; SPILL-O0-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: addi sp, sp, 32 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: foo: ; SPILL-O2: # %bb.0: ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 ; SPILL-O2-NEXT: sub sp, sp, a1 ; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a1, sp, 8 +; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 8 ; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: lui a0, %hi(.L.str) ; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) @@ -60,8 +66,10 @@ ; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 8 ; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 8 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 @@ -78,4 +86,4 @@ } declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i32 %gvl) -declare i32 @puts(i8*); \ No newline at end of file +declare i32 @puts(i8*); Index: llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -17,22 +17,22 @@ ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: addi a1, a1, 24 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: addi a0, sp, 24 ; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: addi a1, sp, 24 ; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: addi a1, a1, 24 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; SPILL-O0-NEXT: # kill: def $x11 killed $x10 +; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 Index: llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -107,9 +107,9 @@ ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: add a2, s1, a2 -; CHECK-NEXT: addi a2, a2, 96 +; CHECK-NEXT: addi a2, a2, 104 ; CHECK-NEXT: vl1re64.v v25, (a2) -; CHECK-NEXT: addi a2, s1, 96 +; CHECK-NEXT: addi a2, s1, 104 ; CHECK-NEXT: vl2re64.v v26, (a2) ; CHECK-NEXT: lw a2, 64(s1) ; CHECK-NEXT: slli a1, a1, 2 Index: llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -9,18 +9,19 @@ define void @foo() #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: addi sp, sp, -16 - ; CHECK-NEXT: sw s9, 12(sp) # 4-byte Folded Spill + ; CHECK-NEXT: addi sp, sp, -32 + ; CHECK-NEXT: sw s9, 28(sp) # 4-byte Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill - ; CHECK-NEXT: vs2r.v v30, (sp) # Unknown-size Folded Spill + ; CHECK-NEXT: addi a0, sp, 16 + ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 - ; CHECK-NEXT: lw s9, 12(sp) # 4-byte Folded Reload - ; CHECK-NEXT: addi sp, sp, 16 + ; CHECK-NEXT: lw s9, 28(sp) # 4-byte Folded Reload + ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: ret void Index: llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -15,7 +15,7 @@ ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 16 + ; CHECK-NEXT: addi a0, sp, 24 ; CHECK-NEXT: vs2r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1