Index: lib/Target/RISCV/RISCVFrameLowering.h =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.h +++ lib/Target/RISCV/RISCVFrameLowering.h @@ -45,6 +45,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + // Get the first stack adjustment amount for SplitSPAdjust. + // Return 0 if we don't want to to split the SP adjustment in prologue and + // epilogue. + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; + protected: const RISCVSubtarget &STI; Index: lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.cpp +++ lib/Target/RISCV/RISCVFrameLowering.cpp @@ -131,6 +131,11 @@ if (StackSize == 0 && !MFI.adjustsStack()) return; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Allocate space on the stack if necessary. adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); @@ -170,7 +175,23 @@ nullptr, RI->getDwarfRegNum(FPReg, true), 0)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + } + // Emit the second SP adjustment after restoring callee saved registers. + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, + MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + if (hasFP(MF)) { // Realign Stack const RISCVRegisterInfo *RI = STI.getRegisterInfo(); if (RI->needsStackRealignment(MF)) { @@ -224,6 +245,24 @@ MachineInstr::FrameDestroy); } + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + + adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount, + MachineInstr::FrameDestroy); + + // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount" + unsigned CFIIndex = + MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, + -FirstSPAdjustAmount)); + BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + if (hasFP(MF)) { // To find the instruction restoring FP from stack. for (auto &I = LastFrameDestroy; I != MBBI; ++I) { @@ -256,6 +295,9 @@ .addCFIIndex(CFIIndex); } + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); @@ -284,6 +326,8 @@ int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); @@ -291,7 +335,11 @@ if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; - Offset += MF.getFrameInfo().getStackSize(); + + if (FirstSPAdjustAmount) + Offset += FirstSPAdjustAmount; + else + Offset += MF.getFrameInfo().getStackSize(); } else if (RI->needsStackRealignment(MF)) { assert(!MFI.hasVarSizedObjects() && "Unexpected combination of stack realignment and varsized objects"); @@ -404,3 +452,39 @@ return MBB.erase(MI); } + +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +// add sp,sp,-2032 +// sw ra,2028(sp) +// sw s0,2024(sp) +// sw s1,2020(sp) +// sw s3,2012(sp) +// sw s4,2008(sp) +// add sp,sp,-64 +uint64_t +RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + uint64_t StackSize = MFI.getStackSize(); + uint64_t StackAlign = getStackAlignment(); + + // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch + // landing. The callee saved registers will be pushed by the + // save-restore libcalls, so we don't have to split the SP adjustment + // in this case. + // + // Return the FirstSPAdjustAmount if the StackSize can not fit in signed + // 12-bit and there exists a callee saved register need to be pushed. + if (!isInt<12>(StackSize) && (CSI.size() > 0)) { + // FirstSPAdjustAmount is choosed as (2048 - StackAlign) + // because 2048 will cause sp = sp + 2048 in epilogue split into + // multi-instructions. The offset smaller than 2048 can fit in signle + // load/store instruction and we have to stick with the stack alignment. + // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16, + // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment. + return 2048 - StackAlign; + } + return 0; +} Index: test/CodeGen/RISCV/large-stack.ll =================================================================== --- test/CodeGen/RISCV/large-stack.ll +++ test/CodeGen/RISCV/large-stack.ll @@ -6,44 +6,44 @@ ; TODO: the quality of the generated code is poor -define void @test() nounwind { +define void @test() { ; RV32I-FPELIM-LABEL: test: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lui a0, 74565 ; RV32I-FPELIM-NEXT: addi a0, a0, 1664 ; RV32I-FPELIM-NEXT: sub sp, sp, a0 +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 305419904 ; RV32I-FPELIM-NEXT: lui a0, 74565 ; RV32I-FPELIM-NEXT: addi a0, a0, 1664 ; RV32I-FPELIM-NEXT: add sp, sp, a0 +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: test: ; RV32I-WITHFP: # %bb.0: +; RV32I-WITHFP-NEXT: addi sp, sp, -2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-WITHFP-NEXT: sw ra, 2028(sp) +; RV32I-WITHFP-NEXT: sw s0, 2024(sp) +; RV32I-WITHFP-NEXT: .cfi_offset ra, -4 +; RV32I-WITHFP-NEXT: .cfi_offset s0, -8 +; RV32I-WITHFP-NEXT: addi s0, sp, 2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1680 +; RV32I-WITHFP-NEXT: addi a0, a0, -352 ; RV32I-WITHFP-NEXT: sub sp, sp, a0 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 305419920 ; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1676 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: sw ra, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1672 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: sw s0, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1680 -; RV32I-WITHFP-NEXT: add s0, sp, a0 -; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1672 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw s0, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1676 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw ra, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 74565 -; RV32I-WITHFP-NEXT: addi a0, a0, 1680 +; RV32I-WITHFP-NEXT: addi a0, a0, -352 ; RV32I-WITHFP-NEXT: add sp, sp, a0 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-WITHFP-NEXT: lw s0, 2024(sp) +; RV32I-WITHFP-NEXT: .cfi_def_cfa sp, 305419920 +; RV32I-WITHFP-NEXT: lw ra, 2028(sp) +; RV32I-WITHFP-NEXT: .cfi_restore ra +; RV32I-WITHFP-NEXT: .cfi_restore s0 +; RV32I-WITHFP-NEXT: addi sp, sp, 2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 0 ; RV32I-WITHFP-NEXT: ret %tmp = alloca [ 305419896 x i8 ] , align 4 ret void @@ -52,20 +52,19 @@ ; This test case artificially produces register pressure which should force ; use of the emergency spill slot. -define void @test_emergency_spill_slot(i32 %a) nounwind { +define void @test_emergency_spill_slot(i32 %a) { ; RV32I-FPELIM-LABEL: test_emergency_spill_slot: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lui a1, 98 -; RV32I-FPELIM-NEXT: addi a1, a1, -1392 +; RV32I-FPELIM-NEXT: addi sp, sp, -2032 +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-FPELIM-NEXT: sw s0, 2028(sp) +; RV32I-FPELIM-NEXT: sw s1, 2024(sp) +; RV32I-FPELIM-NEXT: .cfi_offset s0, -4 +; RV32I-FPELIM-NEXT: .cfi_offset s1, -8 +; RV32I-FPELIM-NEXT: lui a1, 97 +; RV32I-FPELIM-NEXT: addi a1, a1, 672 ; RV32I-FPELIM-NEXT: sub sp, sp, a1 -; RV32I-FPELIM-NEXT: lui a1, 98 -; RV32I-FPELIM-NEXT: addi a1, a1, -1396 -; RV32I-FPELIM-NEXT: add a1, sp, a1 -; RV32I-FPELIM-NEXT: sw s0, 0(a1) -; RV32I-FPELIM-NEXT: lui a1, 98 -; RV32I-FPELIM-NEXT: addi a1, a1, -1400 -; RV32I-FPELIM-NEXT: add a1, sp, a1 -; RV32I-FPELIM-NEXT: sw s1, 0(a1) +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 400016 ; RV32I-FPELIM-NEXT: lui a1, 78 ; RV32I-FPELIM-NEXT: addi a1, a1, 512 ; RV32I-FPELIM-NEXT: addi a2, sp, 8 @@ -77,43 +76,36 @@ ; RV32I-FPELIM-NEXT: #APP ; RV32I-FPELIM-NEXT: nop ; RV32I-FPELIM-NEXT: #NO_APP -; RV32I-FPELIM-NEXT: lui a0, 98 -; RV32I-FPELIM-NEXT: addi a0, a0, -1400 -; RV32I-FPELIM-NEXT: add a0, sp, a0 -; RV32I-FPELIM-NEXT: lw s1, 0(a0) -; RV32I-FPELIM-NEXT: lui a0, 98 -; RV32I-FPELIM-NEXT: addi a0, a0, -1396 -; RV32I-FPELIM-NEXT: add a0, sp, a0 -; RV32I-FPELIM-NEXT: lw s0, 0(a0) -; RV32I-FPELIM-NEXT: lui a0, 98 -; RV32I-FPELIM-NEXT: addi a0, a0, -1392 +; RV32I-FPELIM-NEXT: lui a0, 97 +; RV32I-FPELIM-NEXT: addi a0, a0, 672 ; RV32I-FPELIM-NEXT: add sp, sp, a0 +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-FPELIM-NEXT: lw s1, 2024(sp) +; RV32I-FPELIM-NEXT: lw s0, 2028(sp) +; RV32I-FPELIM-NEXT: .cfi_restore s0 +; RV32I-FPELIM-NEXT: .cfi_restore s1 +; RV32I-FPELIM-NEXT: addi sp, sp, 2032 +; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 0 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: test_emergency_spill_slot: ; RV32I-WITHFP: # %bb.0: -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1376 +; RV32I-WITHFP-NEXT: addi sp, sp, -2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-WITHFP-NEXT: sw ra, 2028(sp) +; RV32I-WITHFP-NEXT: sw s0, 2024(sp) +; RV32I-WITHFP-NEXT: sw s1, 2020(sp) +; RV32I-WITHFP-NEXT: sw s2, 2016(sp) +; RV32I-WITHFP-NEXT: .cfi_offset ra, -4 +; RV32I-WITHFP-NEXT: .cfi_offset s0, -8 +; RV32I-WITHFP-NEXT: .cfi_offset s1, -12 +; RV32I-WITHFP-NEXT: .cfi_offset s2, -16 +; RV32I-WITHFP-NEXT: addi s0, sp, 2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa s0, 0 +; RV32I-WITHFP-NEXT: lui a1, 97 +; RV32I-WITHFP-NEXT: addi a1, a1, 688 ; RV32I-WITHFP-NEXT: sub sp, sp, a1 -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1380 -; RV32I-WITHFP-NEXT: add a1, sp, a1 -; RV32I-WITHFP-NEXT: sw ra, 0(a1) -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1384 -; RV32I-WITHFP-NEXT: add a1, sp, a1 -; RV32I-WITHFP-NEXT: sw s0, 0(a1) -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1388 -; RV32I-WITHFP-NEXT: add a1, sp, a1 -; RV32I-WITHFP-NEXT: sw s1, 0(a1) -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1392 -; RV32I-WITHFP-NEXT: add a1, sp, a1 -; RV32I-WITHFP-NEXT: sw s2, 0(a1) -; RV32I-WITHFP-NEXT: lui a1, 98 -; RV32I-WITHFP-NEXT: addi a1, a1, -1376 -; RV32I-WITHFP-NEXT: add s0, sp, a1 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 400032 ; RV32I-WITHFP-NEXT: lui a1, 78 ; RV32I-WITHFP-NEXT: addi a1, a1, 512 ; RV32I-WITHFP-NEXT: lui a2, 1048478 @@ -128,25 +120,21 @@ ; RV32I-WITHFP-NEXT: #APP ; RV32I-WITHFP-NEXT: nop ; RV32I-WITHFP-NEXT: #NO_APP -; RV32I-WITHFP-NEXT: lui a0, 98 -; RV32I-WITHFP-NEXT: addi a0, a0, -1392 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw s2, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 98 -; RV32I-WITHFP-NEXT: addi a0, a0, -1388 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw s1, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 98 -; RV32I-WITHFP-NEXT: addi a0, a0, -1384 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw s0, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 98 -; RV32I-WITHFP-NEXT: addi a0, a0, -1380 -; RV32I-WITHFP-NEXT: add a0, sp, a0 -; RV32I-WITHFP-NEXT: lw ra, 0(a0) -; RV32I-WITHFP-NEXT: lui a0, 98 -; RV32I-WITHFP-NEXT: addi a0, a0, -1376 +; RV32I-WITHFP-NEXT: lui a0, 97 +; RV32I-WITHFP-NEXT: addi a0, a0, 688 ; RV32I-WITHFP-NEXT: add sp, sp, a0 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-WITHFP-NEXT: lw s2, 2016(sp) +; RV32I-WITHFP-NEXT: lw s1, 2020(sp) +; RV32I-WITHFP-NEXT: lw s0, 2024(sp) +; RV32I-WITHFP-NEXT: .cfi_def_cfa sp, 400032 +; RV32I-WITHFP-NEXT: lw ra, 2028(sp) +; RV32I-WITHFP-NEXT: .cfi_restore ra +; RV32I-WITHFP-NEXT: .cfi_restore s0 +; RV32I-WITHFP-NEXT: .cfi_restore s1 +; RV32I-WITHFP-NEXT: .cfi_restore s2 +; RV32I-WITHFP-NEXT: addi sp, sp, 2032 +; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 0 ; RV32I-WITHFP-NEXT: ret %data = alloca [ 100000 x i32 ] , align 4 %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000 Index: test/CodeGen/RISCV/rv64-large-stack.ll =================================================================== --- test/CodeGen/RISCV/rv64-large-stack.ll +++ test/CodeGen/RISCV/rv64-large-stack.ll @@ -7,26 +7,22 @@ define void @foo() nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -2032 +; CHECK-NEXT: sd ra, 2024(sp) ; CHECK-NEXT: lui a0, 95 ; CHECK-NEXT: addiw a0, a0, 1505 ; CHECK-NEXT: slli a0, a0, 13 -; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: addi a0, a0, -2000 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: lui a0, 781250 -; CHECK-NEXT: addiw a0, a0, 24 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: sd ra, 0(a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: call baz -; CHECK-NEXT: lui a0, 781250 -; CHECK-NEXT: addiw a0, a0, 24 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: ld ra, 0(a0) ; CHECK-NEXT: lui a0, 95 ; CHECK-NEXT: addiw a0, a0, 1505 ; CHECK-NEXT: slli a0, a0, 13 -; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: addi a0, a0, -2000 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld ra, 2024(sp) +; CHECK-NEXT: addi sp, sp, 2032 ; CHECK-NEXT: ret entry: %w = alloca [100000000 x { fp128, fp128 }], align 16 Index: test/CodeGen/RISCV/split-sp-adjust.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/split-sp-adjust.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +; The stack size is 2048 and the SP adjustment will be split. +define i32 @SplitSP() nounwind { +; RV32I-LABEL: SplitSP: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: sw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: call foo +; RV32I-NEXT: mv a0, zero +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: lw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +entry: + %xx = alloca [2028 x i8], align 1 + %0 = getelementptr inbounds [2028 x i8], [2028 x i8]* %xx, i32 0, i32 0 + %call = call i32 @foo(i8* nonnull %0) + ret i32 0 +} + +; The stack size is 2032 and the SP adjustment will not be split. +define i32 @NoSplitSP() nounwind { +; RV32I-LABEL: NoSplitSP: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: sw ra, 2028(sp) +; RV32I-NEXT: addi a0, sp, 4 +; RV32I-NEXT: call foo +; RV32I-NEXT: mv a0, zero +; RV32I-NEXT: lw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +entry: + %xx = alloca [2024 x i8], align 1 + %0 = getelementptr inbounds [2024 x i8], [2024 x i8]* %xx, i32 0, i32 0 + %call = call i32 @foo(i8* nonnull %0) + ret i32 0 +} + +declare i32 @foo(i8*) Index: test/CodeGen/RISCV/stack-realignment.ll =================================================================== --- test/CodeGen/RISCV/stack-realignment.ll +++ test/CodeGen/RISCV/stack-realignment.ll @@ -309,20 +309,11 @@ define void @caller1024() nounwind { ; RV32I-LABEL: caller1024: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1024 -; RV32I-NEXT: sub sp, sp, a0 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1028 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw ra, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1032 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw s0, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1024 -; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: sw ra, 2028(sp) +; RV32I-NEXT: sw s0, 2024(sp) +; RV32I-NEXT: addi s0, sp, 2032 +; RV32I-NEXT: addi sp, sp, -1040 ; RV32I-NEXT: andi sp, sp, -1024 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, -2048 @@ -332,35 +323,19 @@ ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a0, a0, -1024 ; RV32I-NEXT: sub sp, s0, a0 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1032 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw s0, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1028 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw ra, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, -1024 -; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 1040 +; RV32I-NEXT: lw s0, 2024(sp) +; RV32I-NEXT: lw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; ; RV64I-LABEL: caller1024: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1024 -; RV64I-NEXT: sub sp, sp, a0 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1032 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd ra, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1040 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd s0, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1024 -; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: sd ra, 2024(sp) +; RV64I-NEXT: sd s0, 2016(sp) +; RV64I-NEXT: addi s0, sp, 2032 +; RV64I-NEXT: addi sp, sp, -1040 ; RV64I-NEXT: andi sp, sp, -1024 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a0, a0, -2048 @@ -370,17 +345,10 @@ ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a0, a0, -1024 ; RV64I-NEXT: sub sp, s0, a0 -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1040 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld s0, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1032 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld ra, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, -1024 -; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: addi sp, sp, 1040 +; RV64I-NEXT: ld s0, 2016(sp) +; RV64I-NEXT: ld ra, 2024(sp) +; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret %1 = alloca i8, align 1024 call void @callee(i8* %1) @@ -415,20 +383,13 @@ define void @caller2048() nounwind { ; RV32I-LABEL: caller2048: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, 2 -; RV32I-NEXT: addi a0, a0, -2048 -; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: sw ra, 2028(sp) +; RV32I-NEXT: sw s0, 2024(sp) +; RV32I-NEXT: addi s0, sp, 2032 ; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, 2044 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw ra, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, 2040 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw s0, 0(a0) -; RV32I-NEXT: lui a0, 2 -; RV32I-NEXT: addi a0, a0, -2048 -; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: addi a0, a0, 16 +; RV32I-NEXT: sub sp, sp, a0 ; RV32I-NEXT: andi sp, sp, -2048 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: add a0, sp, a0 @@ -438,34 +399,22 @@ ; RV32I-NEXT: addi a0, a0, -2048 ; RV32I-NEXT: sub sp, s0, a0 ; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, 2040 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw s0, 0(a0) -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a0, a0, 2044 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw ra, 0(a0) -; RV32I-NEXT: lui a0, 2 -; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: addi a0, a0, 16 ; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: lw s0, 2024(sp) +; RV32I-NEXT: lw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; ; RV64I-LABEL: caller2048: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, 2 -; RV64I-NEXT: addiw a0, a0, -2048 -; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: sd ra, 2024(sp) +; RV64I-NEXT: sd s0, 2016(sp) +; RV64I-NEXT: addi s0, sp, 2032 ; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, 2040 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd ra, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, 2032 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd s0, 0(a0) -; RV64I-NEXT: lui a0, 2 -; RV64I-NEXT: addiw a0, a0, -2048 -; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: addiw a0, a0, 16 +; RV64I-NEXT: sub sp, sp, a0 ; RV64I-NEXT: andi sp, sp, -2048 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: add a0, sp, a0 @@ -475,16 +424,11 @@ ; RV64I-NEXT: addiw a0, a0, -2048 ; RV64I-NEXT: sub sp, s0, a0 ; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, 2032 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld s0, 0(a0) -; RV64I-NEXT: lui a0, 1 -; RV64I-NEXT: addiw a0, a0, 2040 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld ra, 0(a0) -; RV64I-NEXT: lui a0, 2 -; RV64I-NEXT: addiw a0, a0, -2048 +; RV64I-NEXT: addiw a0, a0, 16 ; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ld s0, 2016(sp) +; RV64I-NEXT: ld ra, 2024(sp) +; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret %1 = alloca i8, align 2048 call void @callee(i8* %1) @@ -519,18 +463,13 @@ define void @caller4096() nounwind { ; RV32I-LABEL: caller4096: ; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: sw ra, 2028(sp) +; RV32I-NEXT: sw s0, 2024(sp) +; RV32I-NEXT: addi s0, sp, 2032 ; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -2032 ; RV32I-NEXT: sub sp, sp, a0 -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, -4 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw ra, 0(a0) -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, -8 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: sw s0, 0(a0) -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: add s0, sp, a0 ; RV32I-NEXT: srli a0, sp, 12 ; RV32I-NEXT: slli sp, a0, 12 ; RV32I-NEXT: lui a0, 2 @@ -540,31 +479,22 @@ ; RV32I-NEXT: lui a0, 3 ; RV32I-NEXT: sub sp, s0, a0 ; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, -8 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw s0, 0(a0) -; RV32I-NEXT: lui a0, 3 -; RV32I-NEXT: addi a0, a0, -4 -; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lw ra, 0(a0) -; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -2032 ; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: lw s0, 2024(sp) +; RV32I-NEXT: lw ra, 2028(sp) +; RV32I-NEXT: addi sp, sp, 2032 ; RV32I-NEXT: ret ; ; RV64I-LABEL: caller4096: ; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: sd ra, 2024(sp) +; RV64I-NEXT: sd s0, 2016(sp) +; RV64I-NEXT: addi s0, sp, 2032 ; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addiw a0, a0, -2032 ; RV64I-NEXT: sub sp, sp, a0 -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, -8 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd ra, 0(a0) -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, -16 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: sd s0, 0(a0) -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: add s0, sp, a0 ; RV64I-NEXT: srli a0, sp, 12 ; RV64I-NEXT: slli sp, a0, 12 ; RV64I-NEXT: lui a0, 2 @@ -574,15 +504,11 @@ ; RV64I-NEXT: lui a0, 3 ; RV64I-NEXT: sub sp, s0, a0 ; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, -16 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld s0, 0(a0) -; RV64I-NEXT: lui a0, 3 -; RV64I-NEXT: addiw a0, a0, -8 -; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: ld ra, 0(a0) -; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addiw a0, a0, -2032 ; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ld s0, 2016(sp) +; RV64I-NEXT: ld ra, 2024(sp) +; RV64I-NEXT: addi sp, sp, 2032 ; RV64I-NEXT: ret %1 = alloca i8, align 4096 call void @callee(i8* %1)