diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -303,21 +303,43 @@ .addReg(SrcReg) .addImm(Val) .setMIFlag(Flag); - } else { - unsigned Opc = RISCV::ADD; - bool IsSub = Val < 0; - if (IsSub) { - Val = -Val; - Opc = RISCV::SUB; - } + return; + } - Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + // Try to split the offset across two ADDIs. We need to keep the stack pointer + // aligned after each ADDI. We need to determine the maximum value we can put + // in each ADDI. In the negative direction, we can use -2048 which is always + // sufficiently aligned. In the positive direction, we need to find the + // largest 12-bit immediate that is aligned. Exclude -4096 since it can be + // created with LUI. + assert(getStackAlign().value() < 2048 && "Stack alignment too large"); + int64_t MaxPosAdjStep = 2048 - getStackAlign().value(); + if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) { + int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep; + Val -= FirstAdj; + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg) .addReg(SrcReg) - .addReg(ScratchReg, RegState::Kill) + .addImm(FirstAdj) + .setMIFlag(Flag); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(Val) .setMIFlag(Flag); + return; } + + unsigned Opc = RISCV::ADD; + if (Val < 0) { + Val = -Val; + Opc = RISCV::SUB; + } + + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); } // Returns the register used to hold the frame pointer. diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -153,9 +153,8 @@ ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: $x10 = frame-destroy LUI 1 - ; CHECK-NEXT: $x10 = frame-destroy ADDIW killed $x10, -1792 - ; CHECK-NEXT: $x2 = frame-destroy SUB $x8, killed $x10 + ; CHECK-NEXT: $x2 = frame-destroy ADDI $x8, -2048 + ; CHECK-NEXT: $x2 = frame-destroy ADDI killed $x2, -256 ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 272 ; CHECK-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) ; CHECK-NEXT: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4) diff --git a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir --- a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir +++ b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir @@ -32,9 +32,8 @@ ; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: ld a0, 8(sp) ; CHECK-NEXT: call spillslot@plt - ; CHECK-NEXT: lui a0, 1 - ; CHECK-NEXT: addiw a0, a0, -1792 - ; CHECK-NEXT: sub sp, s0, a0 + ; CHECK-NEXT: addi sp, s0, -2048 + ; CHECK-NEXT: addi sp, sp, -256 ; CHECK-NEXT: addi sp, sp, 272 ; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -453,9 +453,8 @@ ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: addi s0, sp, 2032 ; RV32I-NEXT: .cfi_def_cfa s0, 0 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -2032 -; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: andi sp, sp, -2048 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, -2048 @@ -463,9 +462,8 @@ ; RV32I-NEXT: call callee@plt ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: sub sp, s0, a0 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -2032 -; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 2032 @@ -481,9 +479,8 @@ ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: addi s0, sp, 2032 ; RV64I-NEXT: .cfi_def_cfa s0, 0 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -2032 -; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: addi sp, sp, -2048 +; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: andi sp, sp, -2048 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a0, a0, -2048 @@ -491,9 +488,8 @@ ; RV64I-NEXT: call callee@plt ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: sub sp, s0, a0 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -2032 -; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 2032