diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H #define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { @@ -56,10 +57,19 @@ MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; - // Get the first stack adjustment amount for SplitSPAdjust. - // Return 0 if we don't want to to split the SP adjustment in prologue and - // epilogue. - uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; + /// Returns whether the stack pointer (SP) should be adjusted in two + /// adjustments in the prologue and epilogue ("split"), or only adjusted once. + /// + /// Splitting the SP adjustment can result in better code size. + /// + /// The result will be `None` if the SP adjustment should not be split, or an + /// Optional containing the first adjustment amount if the adjustment should + /// be split. + /// + /// The first SP adjustment will never be more than the function's StackSize, + /// so that the second SP adjustment is monotinic. + Optional + getFirstSPAdjustmentAmount(const MachineFunction &MF) const; bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -376,11 +376,13 @@ MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ MF.getFunction(), "Stack pointer required, but has been reserved."}); - uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + // This will tell us whether we're adjusting the stack twice or just once. + Optional FirstSPAdjustmentAmount = getFirstSPAdjustmentAmount(MF); + // Split the SP adjustment to reduce the offsets of callee saved spill. - if (FirstSPAdjustAmount) { - StackSize = FirstSPAdjustAmount; - RealStackSize = FirstSPAdjustAmount; + if (FirstSPAdjustmentAmount) { + StackSize = FirstSPAdjustmentAmount.getValue(); + RealStackSize = FirstSPAdjustmentAmount.getValue(); } // Allocate space on the stack if necessary. @@ -405,16 +407,19 @@ // Iterate over list of callee-saved registers and emit .cfi_offset // directives. for (const auto &Entry : CSI) { + Register Reg = Entry.getReg(); int FrameIdx = Entry.getFrameIdx(); + + // Offsets are calculated relative to the CFA, so do not need to change if + // the stack pointer is adjusted twice. int64_t Offset; - // Offsets for objects with fixed locations (IE: those saved by libcall) are - // simply calculated from the frame index. if (FrameIdx < 0) + // Offsets for objects with fixed locations (IE: those saved by libcall) + // are simply calculated from the frame index. Offset = FrameIdx * (int64_t) STI.getXLen() / 8; else - Offset = MFI.getObjectOffset(Entry.getFrameIdx()) - - RVFI->getLibCallStackSize(); - Register Reg = Entry.getReg(); + Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getLibCallStackSize(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -422,7 +427,8 @@ } // Generate new FP. - if (hasFP(MF)) { + bool UsesFramePointer = hasFP(MF); + if (UsesFramePointer) { if (STI.isRegisterReservedByUser(FPReg)) MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ MF.getFunction(), "Frame pointer required, but has been reserved."}); @@ -439,16 +445,21 @@ } // Emit the second SP adjustment after saving callee saved registers. - if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + if (FirstSPAdjustmentAmount) { + int64_t SecondSPAdjustAmount = + MFI.getStackSize() - FirstSPAdjustmentAmount.getValue(); assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, MachineInstr::FrameSetup); - // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", - // don't emit an sp-based .cfi_def_cfa_offset - if (!hasFP(MF)) { + // If we are using a frame pointer (and thus emitted ".cfi_def_cfa fp, N", + // above), we don't need to update the cfa offset after doing the second + // stack pointer adjustment. + // + // This does not need to care about `getLibCallStackSize` because we know + // it will be zero if we're splitting the stack. + if (!UsesFramePointer) { // Emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); @@ -457,7 +468,7 @@ } } - if (hasFP(MF)) { + if (UsesFramePointer) { // Realign Stack const RISCVRegisterInfo *RI = STI.getRegisterInfo(); if (RI->needsStackRealignment(MF)) { @@ -544,9 +555,10 @@ MachineInstr::FrameDestroy); } - uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); - if (FirstSPAdjustAmount) { - uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + Optional FirstSPAdjustmentAmount = getFirstSPAdjustmentAmount(MF); + if (FirstSPAdjustmentAmount) { + int64_t SecondSPAdjustAmount = + StackSize - FirstSPAdjustmentAmount.getValue(); assert(SecondSPAdjustAmount > 0 && "SecondSPAdjustAmount should be greater than zero"); @@ -554,8 +566,8 @@ MachineInstr::FrameDestroy); } - if (FirstSPAdjustAmount) - StackSize = FirstSPAdjustAmount; + if (FirstSPAdjustmentAmount) + StackSize = FirstSPAdjustmentAmount.getValue(); // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); @@ -581,7 +593,7 @@ int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); - uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + Optional FirstSPAdjustmentAmount = getFirstSPAdjustmentAmount(MF); if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); @@ -591,8 +603,8 @@ if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; - if (FirstSPAdjustAmount) - Offset += FirstSPAdjustAmount; + if (FirstSPAdjustmentAmount) + Offset += FirstSPAdjustmentAmount.getValue(); else Offset += MFI.getStackSize(); } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { @@ -720,18 +732,35 @@ return MBB.erase(MI); } -// We would like to split the SP adjustment to reduce prologue/epilogue -// as following instructions. In this way, the offset of the callee saved -// register could fit in a single store. -// add sp,sp,-2032 -// sw ra,2028(sp) -// sw s0,2024(sp) -// sw s1,2020(sp) -// sw s3,2012(sp) -// sw s4,2008(sp) -// add sp,sp,-64 -uint64_t -RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { +// Splitting the stack pointer (sp) adjustment may reduce the number or size of +// instructions in the prologue and epilogue, as shown in the following +// instructions. +// +// The reason to do this is twofold: so that the offset for the callee-saved +// register saves/restores fits into a single (potentially compressed) +// instruction; and to ensure the SP adjustment amounts fit into one or two +// `addi` instructions rather than needing to materialise those immediates with +// extra instructions. +// +// add sp,sp,-2032 +// sw ra,2028(sp) +// sw s0,2024(sp) +// sw s1,2020(sp) +// sw s3,2012(sp) +// sw s4,2008(sp) +// add sp,sp,-64 +// +// If this function returns `None`, then the sp adjustment should be done in a +// single step. +// +// If this function returns an Optional containing a value, then the value is +// the first adjustment for the stack pointer (and the second can be calculated +// by taking the difference between this and the function's StackSize). +// +// The returned value should always be between 0 and the function's StackSize - +// the intention being that both sp adjustments are monotonic. +Optional RISCVFrameLowering::getFirstSPAdjustmentAmount( + const MachineFunction &MF) const { const auto *RVFI = MF.getInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -740,21 +769,62 @@ // Disable SplitSPAdjust if save-restore libcall used. The callee saved // registers will be pushed by the save-restore libcalls, so we don't have to // split the SP adjustment in this case. - if (RVFI->getLibCallStackSize()) - return 0; - - // Return the FirstSPAdjustAmount if the StackSize can not fit in signed - // 12-bit and there exists a callee saved register need to be pushed. - if (!isInt<12>(StackSize) && (CSI.size() > 0)) { - // FirstSPAdjustAmount is choosed as (2048 - StackAlign) - // because 2048 will cause sp = sp + 2048 in epilogue split into - // multi-instructions. The offset smaller than 2048 can fit in signle - // load/store instruction and we have to stick with the stack alignment. - // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16, - // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment. - return 2048 - getStackAlign().value(); + if (RVFI->getLibCallStackSize() > 0) + return None; + + // Splitting the stack pointer adjustment is most useful for when there are + // callee-saved registers, allowing them to be saved and restored with minimal + // instructions. If we don't have any CSRs, then we don't need to split the + // stack pointer adjustment. + if (CSI.size() == 0) + return None; + + const RISCVSubtarget &STI = MF.getSubtarget(); + bool OptSize = MF.getFunction().hasOptSize(); + size_t Saves = CSI.size(); + + // We want to split if the stack size is over a certain threshold. This + // threshold is set by which instructions are available, as we'd prefer to + // use the smallest instructions available. When we have compressed + // instructions we want to balance the benefit of the code size improvement + // against the cost of the additional stack adjustment instruction. + uint32_t OffsetAddressableLimit; + if (STI.hasStdExtC() && ((OptSize && Saves > 1) || (!OptSize && Saves > 4))) { + // On RV*C, we want to use c.l{w,d}sp and c.s{w,d}sp for saving and + // restoring callee-saved registers. + // + // On RV32C, the offset in these (the w variants, as the registers are + // word-sized) have a 8-bit limit. + // + // On RV64C, the offset in these (the d variants, as the registers are + // double-sized) have an 9-bit limit, so we could use 512, but we also + // want to use c.addi16sp to adjust the stack pointer in the prolog and + // the epilog, which has a limit of (-512,496), so we use 496. + OffsetAddressableLimit = STI.is64Bit() ? 496 : 256; + } else { + // If we don't have compressed instructions, we want to use the offset in + // l{d,w} or s{d,w}, which has a 12-bit limit, so 2048. + // + // However, we also want to ensure that we can do both "first" stack + // adjustments in one single instruction, preferrably `addi`. In the + // prolog, this will be `addi sp, sp, -` which will fit a limit of + // 2048, but undoing this in the epilog (`addi sp, sp, 2048`) does not fit + // into a single instruction. + // + // So, in the end we need to choose a value less than 2048, to fit into + // the limit. We would most prefer the offsets remained as aligned as the + // stack is, so we choose 2048 - StackAlign. + OffsetAddressableLimit = 2048 - getStackAlign().value(); } - return 0; + + // There's only point in splitting if the stack size is over the threshold + // we found. + if (StackSize > OffsetAddressableLimit) + return OffsetAddressableLimit; + + // Otherwise we'll have no problem addressing these offsets without splitting + // the stack pointer adjustment. + return None; } bool RISCVFrameLowering::spillCalleeSavedRegisters( diff --git a/llvm/test/CodeGen/RISCV/split-sp-adjust.ll b/llvm/test/CodeGen/RISCV/split-sp-adjust.ll --- a/llvm/test/CodeGen/RISCV/split-sp-adjust.ll +++ b/llvm/test/CodeGen/RISCV/split-sp-adjust.ll @@ -1,45 +1,1055 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+c -verify-machineinstrs -riscv-no-aliases < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IC +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs -riscv-no-aliases < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IC -; The stack size is 2048 and the SP adjustment will be split. -define i32 @SplitSP() nounwind { -; RV32I-LABEL: SplitSP: +; These tests check split stack pointer adjustment. +; +; Instead of adjusting the stack pointer once, if the stack is larger than a +; threshold, we adjust it twice: once to before saving the CSRs, and once after. +; The intention here is to ensure that the offsets when saving the CSRs fit into +; the offset field in the CSR load/store instructions, so the threshold depends +; on which instructions are available. +; +; We show which instructions are compressed, because the important thing is that +; the thresholds allow the stack offsets to fit into c.{l,s}{d,w}sp if +; compressed instructions are available, and `{l,s}{d,w}` if not. It is also +; best if the stack adjustments use compressed instructions when available, or +; at most a single `addi`. +; +; These tests include unwind information to ensure it remains valid when +; splitting the SP adjustment. When splitting the SP adjustment, there will be +; twi .cfi_def_cfa_offset directives, one for each split. The CSR info will come +; between them. This is correct, as the `.cfi_offset , ` offsets +; are defined relative to the CFA, not relative to the stack pointer, so we +; don't need to restate the CFA offsets after the second SP adjustment. + +declare i32 @use_pointer(i8*) + +; stack size is 128 and stack pointer adjustment will not be split. +define i32 @stack_never_split() { +; RV32I-LABEL: stack_never_split: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: .cfi_def_cfa_offset 128 +; RV32I-NEXT: sw ra, 124(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a0, sp, 4 +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw ra, 124(sp) +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_never_split: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -128 +; RV32IC-NEXT: .cfi_def_cfa_offset 128 +; RV32IC-NEXT: c.swsp ra, 124(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: c.addi4spn a0, sp, 4 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.lwsp ra, 124(sp) +; RV32IC-NEXT: c.addi16sp sp, 128 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_never_split: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: .cfi_def_cfa_offset 128 +; RV64I-NEXT: sd ra, 120(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld ra, 120(sp) +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_never_split: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -128 +; RV64IC-NEXT: .cfi_def_cfa_offset 128 +; RV64IC-NEXT: c.sdsp ra, 120(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: c.mv a0, sp +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp ra, 120(sp) +; RV64IC-NEXT: c.addi16sp sp, 128 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [120 x i8], align 1 + %0 = getelementptr inbounds [120 x i8], [120 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +; stack size is 256 (RV32) or 272 (RV64) and stack pointer adjustment will not +; be split. +define i32 @stack_never_split_2() { +; RV32I-LABEL: stack_never_split_2: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi sp, sp, -2032 -; RV32I-NEXT: sw ra, 2028(sp) -; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: addi sp, sp, -256 +; RV32I-NEXT: .cfi_def_cfa_offset 256 +; RV32I-NEXT: sw ra, 252(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw ra, 252(sp) +; RV32I-NEXT: addi sp, sp, 256 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_never_split_2: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -256 +; RV32IC-NEXT: .cfi_def_cfa_offset 256 +; RV32IC-NEXT: c.swsp ra, 252(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.lwsp ra, 252(sp) +; RV32IC-NEXT: c.addi16sp sp, 256 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_never_split_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -272 +; RV64I-NEXT: .cfi_def_cfa_offset 272 +; RV64I-NEXT: sd ra, 264(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 12 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld ra, 264(sp) +; RV64I-NEXT: addi sp, sp, 272 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_never_split_2: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -272 +; RV64IC-NEXT: .cfi_def_cfa_offset 272 +; RV64IC-NEXT: c.sdsp ra, 264(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: c.addi4spn a0, sp, 12 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp ra, 264(sp) +; RV64IC-NEXT: c.addi16sp sp, 272 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [252 x i8], align 1 + %0 = getelementptr inbounds [252 x i8], [252 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +; stack size is 272 (RV32) or 288 (RV64) and stack pointer adjustment will +; be split on RV32*C if we have sufficient instruction compression gains. + +define i32 @stack_split_rv32_c_optspeed_save4() { +; RV32I-LABEL: stack_split_rv32_c_optspeed_save4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -272 +; RV32I-NEXT: .cfi_def_cfa_offset 272 +; RV32I-NEXT: sw ra, 268(sp) +; RV32I-NEXT: sw s0, 264(sp) +; RV32I-NEXT: sw s1, 260(sp) +; RV32I-NEXT: sw s2, 256(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s2, 256(sp) +; RV32I-NEXT: lw s1, 260(sp) +; RV32I-NEXT: lw s0, 264(sp) +; RV32I-NEXT: lw ra, 268(sp) +; RV32I-NEXT: addi sp, sp, 272 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv32_c_optspeed_save4: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -272 +; RV32IC-NEXT: .cfi_def_cfa_offset 272 +; RV32IC-NEXT: sw ra, 268(sp) +; RV32IC-NEXT: sw s0, 264(sp) +; RV32IC-NEXT: sw s1, 260(sp) +; RV32IC-NEXT: sw s2, 256(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: .cfi_offset s1, -12 +; RV32IC-NEXT: .cfi_offset s2, -16 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: lw s2, 256(sp) +; RV32IC-NEXT: lw s1, 260(sp) +; RV32IC-NEXT: lw s0, 264(sp) +; RV32IC-NEXT: lw ra, 268(sp) +; RV32IC-NEXT: c.addi16sp sp, 272 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv32_c_optspeed_save4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -288 +; RV64I-NEXT: .cfi_def_cfa_offset 288 +; RV64I-NEXT: sd ra, 280(sp) +; RV64I-NEXT: sd s0, 272(sp) +; RV64I-NEXT: sd s1, 264(sp) +; RV64I-NEXT: sd s2, 256(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s2, 256(sp) +; RV64I-NEXT: ld s1, 264(sp) +; RV64I-NEXT: ld s0, 272(sp) +; RV64I-NEXT: ld ra, 280(sp) +; RV64I-NEXT: addi sp, sp, 288 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv32_c_optspeed_save4: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -288 +; RV64IC-NEXT: .cfi_def_cfa_offset 288 +; RV64IC-NEXT: c.sdsp ra, 280(sp) +; RV64IC-NEXT: c.sdsp s0, 272(sp) +; RV64IC-NEXT: c.sdsp s1, 264(sp) +; RV64IC-NEXT: c.sdsp s2, 256(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: .cfi_offset s1, -24 +; RV64IC-NEXT: .cfi_offset s2, -32 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.mv a0, sp +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp s2, 256(sp) +; RV64IC-NEXT: c.ldsp s1, 264(sp) +; RV64IC-NEXT: c.ldsp s0, 272(sp) +; RV64IC-NEXT: c.ldsp ra, 280(sp) +; RV64IC-NEXT: c.addi16sp sp, 288 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [256 x i8], align 1 + call void asm sideeffect "", "~{x8},~{x9},~{x18}"() + %0 = getelementptr inbounds [256 x i8], [256 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv32_c_optspeed_save5() { +; RV32I-LABEL: stack_split_rv32_c_optspeed_save5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -272 +; RV32I-NEXT: .cfi_def_cfa_offset 272 +; RV32I-NEXT: sw ra, 268(sp) +; RV32I-NEXT: sw s0, 264(sp) +; RV32I-NEXT: sw s1, 260(sp) +; RV32I-NEXT: sw s2, 256(sp) +; RV32I-NEXT: sw s3, 252(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s3, 252(sp) +; RV32I-NEXT: lw s2, 256(sp) +; RV32I-NEXT: lw s1, 260(sp) +; RV32I-NEXT: lw s0, 264(sp) +; RV32I-NEXT: lw ra, 268(sp) +; RV32I-NEXT: addi sp, sp, 272 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv32_c_optspeed_save5: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -256 +; RV32IC-NEXT: .cfi_def_cfa_offset 256 +; RV32IC-NEXT: c.swsp ra, 252(sp) +; RV32IC-NEXT: c.swsp s0, 248(sp) +; RV32IC-NEXT: c.swsp s1, 244(sp) +; RV32IC-NEXT: c.swsp s2, 240(sp) +; RV32IC-NEXT: c.swsp s3, 236(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: .cfi_offset s1, -12 +; RV32IC-NEXT: .cfi_offset s2, -16 +; RV32IC-NEXT: .cfi_offset s3, -20 +; RV32IC-NEXT: c.addi sp, -16 +; RV32IC-NEXT: .cfi_def_cfa_offset 272 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.addi4spn a0, sp, 12 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.addi sp, 16 +; RV32IC-NEXT: c.lwsp s3, 236(sp) +; RV32IC-NEXT: c.lwsp s2, 240(sp) +; RV32IC-NEXT: c.lwsp s1, 244(sp) +; RV32IC-NEXT: c.lwsp s0, 248(sp) +; RV32IC-NEXT: c.lwsp ra, 252(sp) +; RV32IC-NEXT: c.addi16sp sp, 256 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv32_c_optspeed_save5: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -288 +; RV64I-NEXT: .cfi_def_cfa_offset 288 +; RV64I-NEXT: sd ra, 280(sp) +; RV64I-NEXT: sd s0, 272(sp) +; RV64I-NEXT: sd s1, 264(sp) +; RV64I-NEXT: sd s2, 256(sp) +; RV64I-NEXT: sd s3, 248(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s3, 248(sp) +; RV64I-NEXT: ld s2, 256(sp) +; RV64I-NEXT: ld s1, 264(sp) +; RV64I-NEXT: ld s0, 272(sp) +; RV64I-NEXT: ld ra, 280(sp) +; RV64I-NEXT: addi sp, sp, 288 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv32_c_optspeed_save5: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -288 +; RV64IC-NEXT: .cfi_def_cfa_offset 288 +; RV64IC-NEXT: c.sdsp ra, 280(sp) +; RV64IC-NEXT: c.sdsp s0, 272(sp) +; RV64IC-NEXT: c.sdsp s1, 264(sp) +; RV64IC-NEXT: c.sdsp s2, 256(sp) +; RV64IC-NEXT: c.sdsp s3, 248(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: .cfi_offset s1, -24 +; RV64IC-NEXT: .cfi_offset s2, -32 +; RV64IC-NEXT: .cfi_offset s3, -40 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.addi4spn a0, sp, 8 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp s3, 248(sp) +; RV64IC-NEXT: c.ldsp s2, 256(sp) +; RV64IC-NEXT: c.ldsp s1, 264(sp) +; RV64IC-NEXT: c.ldsp s0, 272(sp) +; RV64IC-NEXT: c.ldsp ra, 280(sp) +; RV64IC-NEXT: c.addi16sp sp, 288 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [240 x i8], align 1 + call void asm sideeffect "", "~{x8},~{x9},~{x18},~{x19}"() + %0 = getelementptr inbounds [240 x i8], [240 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv32_c_optsize_save1() optsize { +; RV32I-LABEL: stack_split_rv32_c_optsize_save1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -272 +; RV32I-NEXT: .cfi_def_cfa_offset 272 +; RV32I-NEXT: sw ra, 268(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw ra, 268(sp) +; RV32I-NEXT: addi sp, sp, 272 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv32_c_optsize_save1: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -272 +; RV32IC-NEXT: .cfi_def_cfa_offset 272 +; RV32IC-NEXT: sw ra, 268(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: lw ra, 268(sp) +; RV32IC-NEXT: c.addi16sp sp, 272 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv32_c_optsize_save1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -288 +; RV64I-NEXT: .cfi_def_cfa_offset 288 +; RV64I-NEXT: sd ra, 280(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 12 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld ra, 280(sp) +; RV64I-NEXT: addi sp, sp, 288 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv32_c_optsize_save1: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -288 +; RV64IC-NEXT: .cfi_def_cfa_offset 288 +; RV64IC-NEXT: c.sdsp ra, 280(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: c.addi4spn a0, sp, 12 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp ra, 280(sp) +; RV64IC-NEXT: c.addi16sp sp, 288 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [268 x i8], align 1 + %0 = getelementptr inbounds [268 x i8], [268 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv32_c_optsize_save2() optsize { +; RV32I-LABEL: stack_split_rv32_c_optsize_save2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -272 +; RV32I-NEXT: .cfi_def_cfa_offset 272 +; RV32I-NEXT: sw ra, 268(sp) +; RV32I-NEXT: sw s0, 264(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s0, 264(sp) +; RV32I-NEXT: lw ra, 268(sp) +; RV32I-NEXT: addi sp, sp, 272 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv32_c_optsize_save2: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -256 +; RV32IC-NEXT: .cfi_def_cfa_offset 256 +; RV32IC-NEXT: c.swsp ra, 252(sp) +; RV32IC-NEXT: c.swsp s0, 248(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: c.addi sp, -16 +; RV32IC-NEXT: .cfi_def_cfa_offset 272 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.addi sp, 16 +; RV32IC-NEXT: c.lwsp s0, 248(sp) +; RV32IC-NEXT: c.lwsp ra, 252(sp) +; RV32IC-NEXT: c.addi16sp sp, 256 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv32_c_optsize_save2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -288 +; RV64I-NEXT: .cfi_def_cfa_offset 288 +; RV64I-NEXT: sd ra, 280(sp) +; RV64I-NEXT: sd s0, 272(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s0, 272(sp) +; RV64I-NEXT: ld ra, 280(sp) +; RV64I-NEXT: addi sp, sp, 288 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv32_c_optsize_save2: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -288 +; RV64IC-NEXT: .cfi_def_cfa_offset 288 +; RV64IC-NEXT: c.sdsp ra, 280(sp) +; RV64IC-NEXT: c.sdsp s0, 272(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.addi4spn a0, sp, 8 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp s0, 272(sp) +; RV64IC-NEXT: c.ldsp ra, 280(sp) +; RV64IC-NEXT: c.addi16sp sp, 288 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [264 x i8], align 1 + call void asm sideeffect "", "~{x8}"() + %0 = getelementptr inbounds [264 x i8], [264 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +; stack size is 496 (RV32) or 512 (RV64) and stack pointer adjustment will be +; split on RV32C and RV64C if we have sufficient instruction compression gains. + +define i32 @stack_split_rv64_c_optspeed_save4() { +; RV32I-LABEL: stack_split_rv64_c_optspeed_save4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -496 +; RV32I-NEXT: .cfi_def_cfa_offset 496 +; RV32I-NEXT: sw ra, 492(sp) +; RV32I-NEXT: sw s0, 488(sp) +; RV32I-NEXT: sw s1, 484(sp) +; RV32I-NEXT: sw s2, 480(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: addi a0, sp, 4 +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s2, 480(sp) +; RV32I-NEXT: lw s1, 484(sp) +; RV32I-NEXT: lw s0, 488(sp) +; RV32I-NEXT: lw ra, 492(sp) +; RV32I-NEXT: addi sp, sp, 496 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv64_c_optspeed_save4: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -496 +; RV32IC-NEXT: .cfi_def_cfa_offset 496 +; RV32IC-NEXT: sw ra, 492(sp) +; RV32IC-NEXT: sw s0, 488(sp) +; RV32IC-NEXT: sw s1, 484(sp) +; RV32IC-NEXT: sw s2, 480(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: .cfi_offset s1, -12 +; RV32IC-NEXT: .cfi_offset s2, -16 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.addi4spn a0, sp, 4 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: lw s2, 480(sp) +; RV32IC-NEXT: lw s1, 484(sp) +; RV32IC-NEXT: lw s0, 488(sp) +; RV32IC-NEXT: lw ra, 492(sp) +; RV32IC-NEXT: c.addi16sp sp, 496 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv64_c_optspeed_save4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: .cfi_def_cfa_offset 512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: sd s0, 496(sp) +; RV64I-NEXT: sd s1, 488(sp) +; RV64I-NEXT: sd s2, 480(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: addi a0, sp, 4 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s2, 480(sp) +; RV64I-NEXT: ld s1, 488(sp) +; RV64I-NEXT: ld s0, 496(sp) +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv64_c_optspeed_save4: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -512 +; RV64IC-NEXT: .cfi_def_cfa_offset 512 +; RV64IC-NEXT: c.sdsp ra, 504(sp) +; RV64IC-NEXT: c.sdsp s0, 496(sp) +; RV64IC-NEXT: c.sdsp s1, 488(sp) +; RV64IC-NEXT: c.sdsp s2, 480(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: .cfi_offset s1, -24 +; RV64IC-NEXT: .cfi_offset s2, -32 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.addi4spn a0, sp, 4 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp s2, 480(sp) +; RV64IC-NEXT: c.ldsp s1, 488(sp) +; RV64IC-NEXT: c.ldsp s0, 496(sp) +; RV64IC-NEXT: c.ldsp ra, 504(sp) +; RV64IC-NEXT: addi sp, sp, 512 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [476 x i8], align 1 + call void asm sideeffect "", "~{x8},~{x9},~{x18}"() + %0 = getelementptr inbounds [476 x i8], [476 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv64_c_optspeed_save5() { +; RV32I-LABEL: stack_split_rv64_c_optspeed_save5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -496 +; RV32I-NEXT: .cfi_def_cfa_offset 496 +; RV32I-NEXT: sw ra, 492(sp) +; RV32I-NEXT: sw s0, 488(sp) +; RV32I-NEXT: sw s1, 484(sp) +; RV32I-NEXT: sw s2, 480(sp) +; RV32I-NEXT: sw s3, 476(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: addi a0, sp, 4 +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s3, 476(sp) +; RV32I-NEXT: lw s2, 480(sp) +; RV32I-NEXT: lw s1, 484(sp) +; RV32I-NEXT: lw s0, 488(sp) +; RV32I-NEXT: lw ra, 492(sp) +; RV32I-NEXT: addi sp, sp, 496 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv64_c_optspeed_save5: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -256 +; RV32IC-NEXT: .cfi_def_cfa_offset 256 +; RV32IC-NEXT: c.swsp ra, 252(sp) +; RV32IC-NEXT: c.swsp s0, 248(sp) +; RV32IC-NEXT: c.swsp s1, 244(sp) +; RV32IC-NEXT: c.swsp s2, 240(sp) +; RV32IC-NEXT: c.swsp s3, 236(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: .cfi_offset s1, -12 +; RV32IC-NEXT: .cfi_offset s2, -16 +; RV32IC-NEXT: .cfi_offset s3, -20 +; RV32IC-NEXT: c.addi16sp sp, -240 +; RV32IC-NEXT: .cfi_def_cfa_offset 496 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.addi4spn a0, sp, 4 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.addi16sp sp, 240 +; RV32IC-NEXT: c.lwsp s3, 236(sp) +; RV32IC-NEXT: c.lwsp s2, 240(sp) +; RV32IC-NEXT: c.lwsp s1, 244(sp) +; RV32IC-NEXT: c.lwsp s0, 248(sp) +; RV32IC-NEXT: c.lwsp ra, 252(sp) +; RV32IC-NEXT: c.addi16sp sp, 256 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv64_c_optspeed_save5: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: .cfi_def_cfa_offset 512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: sd s0, 496(sp) +; RV64I-NEXT: sd s1, 488(sp) +; RV64I-NEXT: sd s2, 480(sp) +; RV64I-NEXT: sd s3, 472(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s3, 472(sp) +; RV64I-NEXT: ld s2, 480(sp) +; RV64I-NEXT: ld s1, 488(sp) +; RV64I-NEXT: ld s0, 496(sp) +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv64_c_optspeed_save5: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -496 +; RV64IC-NEXT: .cfi_def_cfa_offset 496 +; RV64IC-NEXT: c.sdsp ra, 488(sp) +; RV64IC-NEXT: c.sdsp s0, 480(sp) +; RV64IC-NEXT: c.sdsp s1, 472(sp) +; RV64IC-NEXT: c.sdsp s2, 464(sp) +; RV64IC-NEXT: c.sdsp s3, 456(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: .cfi_offset s1, -24 +; RV64IC-NEXT: .cfi_offset s2, -32 +; RV64IC-NEXT: .cfi_offset s3, -40 +; RV64IC-NEXT: c.addi sp, -16 +; RV64IC-NEXT: .cfi_def_cfa_offset 512 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.mv a0, sp +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.addi sp, 16 +; RV64IC-NEXT: c.ldsp s3, 456(sp) +; RV64IC-NEXT: c.ldsp s2, 464(sp) +; RV64IC-NEXT: c.ldsp s1, 472(sp) +; RV64IC-NEXT: c.ldsp s0, 480(sp) +; RV64IC-NEXT: c.ldsp ra, 488(sp) +; RV64IC-NEXT: c.addi16sp sp, 496 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [472 x i8], align 1 + call void asm sideeffect "", "~{x8},~{x9},~{x18},~{x19}"() + %0 = getelementptr inbounds [472 x i8], [472 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv64_c_optsize_save1() optsize { +; RV32I-LABEL: stack_split_rv64_c_optsize_save1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -496 +; RV32I-NEXT: .cfi_def_cfa_offset 496 +; RV32I-NEXT: sw ra, 492(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw ra, 492(sp) +; RV32I-NEXT: addi sp, sp, 496 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv64_c_optsize_save1: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -496 +; RV32IC-NEXT: .cfi_def_cfa_offset 496 +; RV32IC-NEXT: sw ra, 492(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: lw ra, 492(sp) +; RV32IC-NEXT: c.addi16sp sp, 496 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv64_c_optsize_save1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: .cfi_def_cfa_offset 512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 12 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv64_c_optsize_save1: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -512 +; RV64IC-NEXT: .cfi_def_cfa_offset 512 +; RV64IC-NEXT: c.sdsp ra, 504(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: c.addi4spn a0, sp, 12 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.ldsp ra, 504(sp) +; RV64IC-NEXT: addi sp, sp, 512 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [492 x i8], align 1 + %0 = getelementptr inbounds [492 x i8], [492 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +define i32 @stack_split_rv64_c_optsize_save2() optsize { +; RV32I-LABEL: stack_split_rv64_c_optsize_save2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -496 +; RV32I-NEXT: .cfi_def_cfa_offset 496 +; RV32I-NEXT: sw ra, 492(sp) +; RV32I-NEXT: sw s0, 488(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call use_pointer +; RV32I-NEXT: lw s0, 488(sp) +; RV32I-NEXT: lw ra, 492(sp) +; RV32I-NEXT: addi sp, sp, 496 +; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_rv64_c_optsize_save2: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: c.addi16sp sp, -256 +; RV32IC-NEXT: .cfi_def_cfa_offset 256 +; RV32IC-NEXT: c.swsp ra, 252(sp) +; RV32IC-NEXT: c.swsp s0, 248(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: c.addi16sp sp, -240 +; RV32IC-NEXT: .cfi_def_cfa_offset 496 +; RV32IC-NEXT: #APP +; RV32IC-NEXT: #NO_APP +; RV32IC-NEXT: c.mv a0, sp +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.addi16sp sp, 240 +; RV32IC-NEXT: c.lwsp s0, 248(sp) +; RV32IC-NEXT: c.lwsp ra, 252(sp) +; RV32IC-NEXT: c.addi16sp sp, 256 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_rv64_c_optsize_save2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: .cfi_def_cfa_offset 512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: sd s0, 496(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: ld s0, 496(sp) +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_rv64_c_optsize_save2: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.addi16sp sp, -496 +; RV64IC-NEXT: .cfi_def_cfa_offset 496 +; RV64IC-NEXT: c.sdsp ra, 488(sp) +; RV64IC-NEXT: c.sdsp s0, 480(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: c.addi sp, -16 +; RV64IC-NEXT: .cfi_def_cfa_offset 512 +; RV64IC-NEXT: #APP +; RV64IC-NEXT: #NO_APP +; RV64IC-NEXT: c.addi4spn a0, sp, 8 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.addi sp, 16 +; RV64IC-NEXT: c.ldsp s0, 480(sp) +; RV64IC-NEXT: c.ldsp ra, 488(sp) +; RV64IC-NEXT: c.addi16sp sp, 496 +; RV64IC-NEXT: c.jr ra +entry: + %xx = alloca [488 x i8], align 1 + call void asm sideeffect "", "~{x8}"() + %0 = getelementptr inbounds [488 x i8], [488 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) + ret i32 %call +} + +; The stack size is 2048 and the SP adjustment will always be split. +define i32 @stack_split_always() { +; RV32I-LABEL: stack_split_always: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: .cfi_def_cfa_offset 2048 +; RV32I-NEXT: sw ra, 2044(sp) +; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: addi a0, sp, 16 -; RV32I-NEXT: call foo +; RV32I-NEXT: call use_pointer ; RV32I-NEXT: mv a0, zero -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: lw ra, 2028(sp) -; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: lw ra, 2044(sp) +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, -2048 +; RV32I-NEXT: add sp, sp, a1 ; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_always: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: addi sp, sp, -2048 +; RV32IC-NEXT: .cfi_def_cfa_offset 2048 +; RV32IC-NEXT: sw ra, 2044(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: c.addi4spn a0, sp, 16 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.li a0, 0 +; RV32IC-NEXT: lw ra, 2044(sp) +; RV32IC-NEXT: c.lui a1, 1 +; RV32IC-NEXT: addi a1, a1, -2048 +; RV32IC-NEXT: c.add sp, a1 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_always: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -2048 +; RV64I-NEXT: .cfi_def_cfa_offset 2048 +; RV64I-NEXT: sd ra, 2040(sp) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 12 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: mv a0, zero +; RV64I-NEXT: ld ra, 2040(sp) +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -2048 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_always: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: addi sp, sp, -2048 +; RV64IC-NEXT: .cfi_def_cfa_offset 2048 +; RV64IC-NEXT: sd ra, 2040(sp) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: c.addi4spn a0, sp, 12 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.li a0, 0 +; RV64IC-NEXT: ld ra, 2040(sp) +; RV64IC-NEXT: c.lui a1, 1 +; RV64IC-NEXT: addiw a1, a1, -2048 +; RV64IC-NEXT: c.add sp, a1 +; RV64IC-NEXT: c.jr ra entry: %xx = alloca [2028 x i8], align 1 %0 = getelementptr inbounds [2028 x i8], [2028 x i8]* %xx, i32 0, i32 0 - %call = call i32 @foo(i8* nonnull %0) + %call = call i32 @use_pointer(i8* nonnull %0) ret i32 0 } -; The stack size is 2032 and the SP adjustment will not be split. -define i32 @NoSplitSP() nounwind { -; RV32I-LABEL: NoSplitSP: +; The stack size is 2048 and the SP adjustment will always be split. This test +; ensures that the CFI info remains correct if the stack pointer adjustment is +; split and a frame pointer is needed. +define i32 @stack_split_always_with_fp() "frame-pointer"="all" { +; RV32I-LABEL: stack_split_always_with_fp: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi sp, sp, -2032 -; RV32I-NEXT: sw ra, 2028(sp) -; RV32I-NEXT: addi a0, sp, 4 -; RV32I-NEXT: call foo +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: .cfi_def_cfa_offset 2048 +; RV32I-NEXT: sw ra, 2044(sp) +; RV32I-NEXT: sw s0, 2040(sp) +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, s0, -2040 +; RV32I-NEXT: call use_pointer ; RV32I-NEXT: mv a0, zero -; RV32I-NEXT: lw ra, 2028(sp) -; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: lw s0, 2040(sp) +; RV32I-NEXT: lw ra, 2044(sp) +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, -2048 +; RV32I-NEXT: add sp, sp, a1 ; RV32I-NEXT: ret +; +; RV32IC-LABEL: stack_split_always_with_fp: +; RV32IC: # %bb.0: # %entry +; RV32IC-NEXT: addi sp, sp, -2048 +; RV32IC-NEXT: .cfi_def_cfa_offset 2048 +; RV32IC-NEXT: sw ra, 2044(sp) +; RV32IC-NEXT: sw s0, 2040(sp) +; RV32IC-NEXT: .cfi_offset ra, -4 +; RV32IC-NEXT: .cfi_offset s0, -8 +; RV32IC-NEXT: c.lui a0, 1 +; RV32IC-NEXT: addi a0, a0, -2048 +; RV32IC-NEXT: add s0, sp, a0 +; RV32IC-NEXT: .cfi_def_cfa s0, 0 +; RV32IC-NEXT: addi a0, s0, -2040 +; RV32IC-NEXT: call use_pointer +; RV32IC-NEXT: c.li a0, 0 +; RV32IC-NEXT: lw s0, 2040(sp) +; RV32IC-NEXT: lw ra, 2044(sp) +; RV32IC-NEXT: c.lui a1, 1 +; RV32IC-NEXT: addi a1, a1, -2048 +; RV32IC-NEXT: c.add sp, a1 +; RV32IC-NEXT: c.jr ra +; +; RV64I-LABEL: stack_split_always_with_fp: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, -2032 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 2064 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, -2040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, -2048 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, -2032 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: lui a0, 1048575 +; RV64I-NEXT: addiw a0, a0, 2044 +; RV64I-NEXT: add a0, s0, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call use_pointer +; RV64I-NEXT: mv a0, zero +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -2048 +; RV64I-NEXT: add a1, sp, a1 +; RV64I-NEXT: ld s0, 0(a1) +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -2040 +; RV64I-NEXT: add a1, sp, a1 +; RV64I-NEXT: ld ra, 0(a1) +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -2032 +; RV64I-NEXT: add sp, sp, a1 +; RV64I-NEXT: ret +; +; RV64IC-LABEL: stack_split_always_with_fp: +; RV64IC: # %bb.0: # %entry +; RV64IC-NEXT: c.lui a0, 1 +; RV64IC-NEXT: addiw a0, a0, -2032 +; RV64IC-NEXT: sub sp, sp, a0 +; RV64IC-NEXT: .cfi_def_cfa_offset 2064 +; RV64IC-NEXT: c.lui a0, 1 +; RV64IC-NEXT: addiw a0, a0, -2040 +; RV64IC-NEXT: c.add a0, sp +; RV64IC-NEXT: sd ra, 0(a0) +; RV64IC-NEXT: c.lui a0, 1 +; RV64IC-NEXT: addiw a0, a0, -2048 +; RV64IC-NEXT: c.add a0, sp +; RV64IC-NEXT: c.sd s0, 0(a0) +; RV64IC-NEXT: .cfi_offset ra, -8 +; RV64IC-NEXT: .cfi_offset s0, -16 +; RV64IC-NEXT: c.lui a0, 1 +; RV64IC-NEXT: addiw a0, a0, -2032 +; RV64IC-NEXT: add s0, sp, a0 +; RV64IC-NEXT: .cfi_def_cfa s0, 0 +; RV64IC-NEXT: c.lui a0, 1048575 +; RV64IC-NEXT: addiw a0, a0, 2044 +; RV64IC-NEXT: c.add a0, s0 +; RV64IC-NEXT: c.mv a0, a0 +; RV64IC-NEXT: call use_pointer +; RV64IC-NEXT: c.li a0, 0 +; RV64IC-NEXT: c.lui a1, 1 +; RV64IC-NEXT: addiw a1, a1, -2048 +; RV64IC-NEXT: c.add a1, sp +; RV64IC-NEXT: c.ld s0, 0(a1) +; RV64IC-NEXT: c.lui a1, 1 +; RV64IC-NEXT: addiw a1, a1, -2040 +; RV64IC-NEXT: c.add a1, sp +; RV64IC-NEXT: ld ra, 0(a1) +; RV64IC-NEXT: c.lui a1, 1 +; RV64IC-NEXT: addiw a1, a1, -2032 +; RV64IC-NEXT: c.add sp, a1 +; RV64IC-NEXT: c.jr ra entry: - %xx = alloca [2024 x i8], align 1 - %0 = getelementptr inbounds [2024 x i8], [2024 x i8]* %xx, i32 0, i32 0 - %call = call i32 @foo(i8* nonnull %0) + %xx = alloca [2028 x i8], align 1 + %0 = getelementptr inbounds [2028 x i8], [2028 x i8]* %xx, i32 0, i32 0 + %call = call i32 @use_pointer(i8* nonnull %0) ret i32 0 } - -declare i32 @foo(i8*)