diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -101,6 +109,12 @@ const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); @@ -158,6 +172,29 @@ nullptr, RI->getDwarfRegNum(FPReg, true), 0)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } + else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + unsigned VR = MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } } } @@ -257,6 +294,13 @@ if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still stack objects using SP. + FrameReg = RISCV::X2; + Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-unsupported.ll b/llvm/test/CodeGen/RISCV/stack-realignment-unsupported.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-realignment-unsupported.ll @@ -0,0 +1,13 @@ +; RUN: not llc -mtriple=riscv32 < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=riscv64 < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: RISC-V backend can't currently handle functions that need stack realignment and have variable sized objects + +declare void @callee(i8*, i32*) + +define void @caller(i32 %n) nounwind { + %1 = alloca i8, i32 %n + %2 = alloca i32, align 64 + call void @callee(i8* %1, i32 *%2) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +declare void @callee(i8*) + +define void @caller32() nounwind { +; RV32I-LABEL: caller32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: addi s0, sp, 64 +; RV32I-NEXT: andi sp, sp, -32 +; RV32I-NEXT: addi a0, sp, 32 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -64 +; RV32I-NEXT: lw s0, 56(sp) +; RV32I-NEXT: lw ra, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: addi s0, sp, 64 +; RV64I-NEXT: andi sp, sp, -32 +; RV64I-NEXT: addi a0, sp, 32 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -64 +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign32() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller64() nounwind { +; RV32I-LABEL: caller64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: sw ra, 124(sp) +; RV32I-NEXT: sw s0, 120(sp) +; RV32I-NEXT: addi s0, sp, 128 +; RV32I-NEXT: andi sp, sp, -64 +; RV32I-NEXT: addi a0, sp, 64 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -128 +; RV32I-NEXT: lw s0, 120(sp) +; RV32I-NEXT: lw ra, 124(sp) +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: sd ra, 120(sp) +; RV64I-NEXT: sd s0, 112(sp) +; RV64I-NEXT: addi s0, sp, 128 +; RV64I-NEXT: andi sp, sp, -64 +; RV64I-NEXT: addi a0, sp, 64 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -128 +; RV64I-NEXT: ld s0, 112(sp) +; RV64I-NEXT: ld ra, 120(sp) +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign64() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller128() nounwind { +; RV32I-LABEL: caller128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -256 +; RV32I-NEXT: sw ra, 252(sp) +; RV32I-NEXT: sw s0, 248(sp) +; RV32I-NEXT: addi s0, sp, 256 +; RV32I-NEXT: andi sp, sp, -128 +; RV32I-NEXT: addi a0, sp, 128 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -256 +; RV32I-NEXT: lw s0, 248(sp) +; RV32I-NEXT: lw ra, 252(sp) +; RV32I-NEXT: addi sp, sp, 256 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -256 +; RV64I-NEXT: sd ra, 248(sp) +; RV64I-NEXT: sd s0, 240(sp) +; RV64I-NEXT: addi s0, sp, 256 +; RV64I-NEXT: andi sp, sp, -128 +; RV64I-NEXT: addi a0, sp, 128 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -256 +; RV64I-NEXT: ld s0, 240(sp) +; RV64I-NEXT: ld ra, 248(sp) +; RV64I-NEXT: addi sp, sp, 256 +; RV64I-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign128() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller256() nounwind { +; RV32I-LABEL: caller256: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -512 +; RV32I-NEXT: sw ra, 508(sp) +; RV32I-NEXT: sw s0, 504(sp) +; RV32I-NEXT: addi s0, sp, 512 +; RV32I-NEXT: andi sp, sp, -256 +; RV32I-NEXT: addi a0, sp, 256 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -512 +; RV32I-NEXT: lw s0, 504(sp) +; RV32I-NEXT: lw ra, 508(sp) +; RV32I-NEXT: addi sp, sp, 512 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller256: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: sd s0, 496(sp) +; RV64I-NEXT: addi s0, sp, 512 +; RV64I-NEXT: andi sp, sp, -256 +; RV64I-NEXT: addi a0, sp, 256 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -512 +; RV64I-NEXT: ld s0, 496(sp) +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign256() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign256: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign256: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller512() nounwind { +; RV32I-LABEL: caller512: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -1536 +; RV32I-NEXT: sw ra, 1532(sp) +; RV32I-NEXT: sw s0, 1528(sp) +; RV32I-NEXT: addi s0, sp, 1536 +; RV32I-NEXT: andi sp, sp, -512 +; RV32I-NEXT: addi a0, sp, 1024 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -1536 +; RV32I-NEXT: lw s0, 1528(sp) +; RV32I-NEXT: lw ra, 1532(sp) +; RV32I-NEXT: addi sp, sp, 1536 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller512: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -1536 +; RV64I-NEXT: sd ra, 1528(sp) +; RV64I-NEXT: sd s0, 1520(sp) +; RV64I-NEXT: addi s0, sp, 1536 +; RV64I-NEXT: andi sp, sp, -512 +; RV64I-NEXT: addi a0, sp, 1024 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -1536 +; RV64I-NEXT: ld s0, 1520(sp) +; RV64I-NEXT: ld ra, 1528(sp) +; RV64I-NEXT: addi sp, sp, 1536 +; RV64I-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign512() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign512: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign512: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller1024() nounwind { +; RV32I-LABEL: caller1024: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1028 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1032 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: andi sp, sp, -1024 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1032 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1028 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller1024: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: andi sp, sp, -1024 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign1024() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign1024: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign1024: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller2048() nounwind { +; RV32I-LABEL: caller2048: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2044 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2040 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: andi sp, sp, -2048 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2040 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2044 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller2048: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: andi sp, sp, -2048 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign2048() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign2048: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign2048: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller4096() nounwind { +; RV32I-LABEL: caller4096: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -4 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -8 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: srli a0, sp, 12 +; RV32I-NEXT: slli sp, a0, 12 +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -8 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -4 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller4096: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -8 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -16 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: srli a0, sp, 12 +; RV64I-NEXT: slli sp, a0, 12 +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -16 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -8 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign4096() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign4096: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign4096: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +}