Index: lib/Target/RISCV/CMakeLists.txt =================================================================== --- lib/Target/RISCV/CMakeLists.txt +++ lib/Target/RISCV/CMakeLists.txt @@ -22,6 +22,7 @@ RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVMCInstLower.cpp + RISCVMachineFunctionInfo.cpp RISCVMergeBaseOffset.cpp RISCVRegisterInfo.cpp RISCVSubtarget.cpp Index: lib/Target/RISCV/RISCVFrameLowering.h =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.h +++ lib/Target/RISCV/RISCVFrameLowering.h @@ -44,6 +44,15 @@ MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + std::vector &CSI, + const TargetRegisterInfo *TRI) const override; protected: const RISCVSubtarget &STI; Index: lib/Target/RISCV/RISCVFrameLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVFrameLowering.cpp +++ lib/Target/RISCV/RISCVFrameLowering.cpp @@ -101,6 +101,11 @@ unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); + // Since spillCalleeSavedRegisters may have inserted a libcall, skip past + // any instructions marked as FrameSetup + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; @@ -125,8 +130,8 @@ // to the stack, not before. // FIXME: assumes exactly one instruction is used to save each callee-saved // register. - const std::vector &CSI = MFI.getCalleeSavedInfo(); - std::advance(MBBI, CSI.size()); + if (!RVFI->getUseSaveRestoreLibCalls()) + std::advance(MBBI, MFI.getCalleeSavedInfo().size()); // Generate new FP. if (hasFP(MF)) @@ -144,10 +149,18 @@ unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); + // If callee-saved registers are saved via libcall, place stack adjustment + // before this call. + while (MBBI != MBB.begin() && + std::prev(MBBI)->getFlag(MachineInstr::FrameDestroy)) + --MBBI; + // Skip to before the restores of callee-saved registers // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. - auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size()); + auto LastFrameDestroy = MBBI; + if (!RVFI->getUseSaveRestoreLibCalls()) + LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size()); uint64_t StackSize = MFI.getStackSize(); @@ -295,3 +308,160 @@ return MBB.erase(MI); } + +// Get the name of the libcall used for spilling callee saved registers. +// If this function will not use save/restore libcalls, then return a nullptr. +static const char * +getSpillLibCallName(MachineFunction &MF, + const std::vector &CSI) { + auto *RVFI = MF.getInfo(); + + if (CSI.empty() || !RVFI->getUseSaveRestoreLibCalls()) + return nullptr; + + using std::max; + unsigned MaxReg = 0; + for (auto &CS : CSI) + MaxReg = std::max(MaxReg, CS.getReg()); + + switch (MaxReg) { + default: + llvm_unreachable("Something has gone wrong!"); + case /*s11*/ RISCV::X27: return "__riscv_save_12"; + case /*s10*/ RISCV::X26: return "__riscv_save_11"; + case /*s9*/ RISCV::X25: return "__riscv_save_10"; + case /*s8*/ RISCV::X24: return "__riscv_save_9"; + case /*s7*/ RISCV::X23: return "__riscv_save_8"; + case /*s6*/ RISCV::X22: return "__riscv_save_7"; + case /*s5*/ RISCV::X21: return "__riscv_save_6"; + case /*s4*/ RISCV::X20: return "__riscv_save_5"; + case /*s3*/ RISCV::X19: return "__riscv_save_4"; + case /*s2*/ RISCV::X18: return "__riscv_save_3"; + case /*s1*/ RISCV::X9: return "__riscv_save_2"; + case /*s0*/ RISCV::X8: return "__riscv_save_1"; + case /*ra*/ RISCV::X1: return "__riscv_save_0"; + } +} + +// Get the name of the libcall used for restoring callee saved registers. +// If this function will not use save/restore libcalls, then return a nullptr. +static const char * +getRestoreLibCallName(MachineFunction &MF, + const std::vector &CSI) { + auto *RVFI = MF.getInfo(); + + if (CSI.empty() || !RVFI->getUseSaveRestoreLibCalls()) + return nullptr; + + using std::max; + unsigned MaxReg = 0; + for (auto &CS : CSI) + MaxReg = std::max(MaxReg, CS.getReg()); + + switch (MaxReg) { + default: + llvm_unreachable("Something has gone wrong!"); + case /*s11*/ RISCV::X27: return "__riscv_restore_12"; + case /*s10*/ RISCV::X26: return "__riscv_restore_11"; + case /*s9*/ RISCV::X25: return "__riscv_restore_10"; + case /*s8*/ RISCV::X24: return "__riscv_restore_9"; + case /*s7*/ RISCV::X23: return "__riscv_restore_8"; + case /*s6*/ RISCV::X22: return "__riscv_restore_7"; + case /*s5*/ RISCV::X21: return "__riscv_restore_6"; + case /*s4*/ RISCV::X20: return "__riscv_restore_5"; + case /*s3*/ RISCV::X19: return "__riscv_restore_4"; + case /*s2*/ RISCV::X18: return "__riscv_restore_3"; + case /*s1*/ RISCV::X9: return "__riscv_restore_2"; + case /*s0*/ RISCV::X8: return "__riscv_restore_1"; + case /*ra*/ RISCV::X1: return "__riscv_restore_0"; + } +} + +bool RISCVFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + DebugLoc DL; + if (MI != MBB.end() && !MI->isDebugInstr()) + DL = MI->getDebugLoc(); + + const char *SpillLibCall = getSpillLibCallName(*MF, CSI); + if (SpillLibCall) { + // Add spill libcall via non-callee-saved register t0. + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) + .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameSetup); + + // Add registers spilled in libcall as liveins. + for (auto &CS : CSI) + MBB.addLiveIn(CS.getReg()); + } else { + // Manually spill values not spilled by libcall. + for (auto &CS : CSI) { + // Insert the spill to the stack frame. + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI); + } + } + + return true; +} + +bool RISCVFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + std::vector &CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + DebugLoc DL; + if (MI != MBB.end() && !MI->isDebugInstr()) + DL = MI->getDebugLoc(); + + const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); + if (RestoreLibCall) { + // Replace terminating tail calls with a simple call. This is valid because + // the return address register is always callee saved as part of the + // save/restore libcalls. + if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoTAIL) { + MachineBasicBlock::iterator NewMI = + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALL)) + .add(MI->getOperand(0)); + NewMI->copyImplicitOps(*MF, *MI); + MI->eraseFromParent(); + MI = ++NewMI; + } + + // Add restore libcall via tail call. + MachineBasicBlock::iterator NewMI = + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) + .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameDestroy); + + // Remove trailing returns, since the terminator is now a tail call to the + // restore function. + if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { + NewMI->copyImplicitOps(*MF, *MI); + MI->eraseFromParent(); + } + } else { + // Manually restore values not restored by libcall. Insert in reverse order. + // loadRegFromStackSlot can insert multiple instructions. + for (auto &CS : reverse(CSI)) { + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + } + + return true; +} Index: lib/Target/RISCV/RISCVMachineFunctionInfo.h =================================================================== --- lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -30,6 +30,8 @@ /// FrameIndex used for transferring values between 64-bit FPRs and a pair /// of 32-bit GPRs via the stack. int MoveF64FrameIndex = -1; + /// Store whether libcalls are used to save/restore callee-saved registers. + Optional UseSaveRestoreLibCalls = None; public: // RISCVMachineFunctionInfo() = default; @@ -47,6 +49,14 @@ MoveF64FrameIndex = MF.getFrameInfo().CreateStackObject(8, 8, false); return MoveF64FrameIndex; } + + bool useSaveRestoreLibCalls() const; + + bool getUseSaveRestoreLibCalls() { + if (!UseSaveRestoreLibCalls) + UseSaveRestoreLibCalls = useSaveRestoreLibCalls(); + return *UseSaveRestoreLibCalls; + } }; } // end namespace llvm Index: lib/Target/RISCV/RISCVMachineFunctionInfo.cpp =================================================================== --- /dev/null +++ lib/Target/RISCV/RISCVMachineFunctionInfo.cpp @@ -0,0 +1,40 @@ +//=- RISCVMachineFunctionInfo.cpp - RISCV machine function info ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines functionality for RISCVMachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +static cl::opt EnableSaveRestore( + "enable-save-restore", cl::init(false), + cl::desc("Enable save/restore of callee-saved registers via libcalls")); + +bool RISCVMachineFunctionInfo::useSaveRestoreLibCalls() const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (!EnableSaveRestore) + return false; + + // We cannot use fixed locations for the callee saved spill slots if there is + // any chance of other objects also requiring fixed locations in the stack + // frame. This is called before adding the fixed spill slots so there should + // be no fixed objects at all. + if (MFI.getNumFixedObjects()) + return false; + + // If the stack will be adjusted anyway and there are not a significant amount + // of callee saved registers, it is not beneficial to use a libcall. + if (MFI.getNumObjects() > 0 && MFI.getCalleeSavedInfo().size() < 2) + return false; + + return true; +} Index: lib/Target/RISCV/RISCVRegisterInfo.h =================================================================== --- lib/Target/RISCV/RISCVRegisterInfo.h +++ lib/Target/RISCV/RISCVRegisterInfo.h @@ -35,6 +35,9 @@ const uint32_t *getNoPreservedMask() const override; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, + int &FrameIdx) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; Index: lib/Target/RISCV/RISCVRegisterInfo.cpp =================================================================== --- lib/Target/RISCV/RISCVRegisterInfo.cpp +++ lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -12,6 +12,7 @@ #include "RISCVRegisterInfo.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -80,6 +81,38 @@ return CSR_NoRegs_RegMask; } +// Frame indexes representing locations of CSRs which are given a fixed location +// by save/restore libcalls. +static std::map FixedCSRFIMap = { + {/*ra*/ RISCV::X1, -1}, + {/*s0*/ RISCV::X8, -2}, + {/*s1*/ RISCV::X9, -3}, + {/*s2*/ RISCV::X18, -4}, + {/*s3*/ RISCV::X19, -5}, + {/*s4*/ RISCV::X20, -6}, + {/*s5*/ RISCV::X21, -7}, + {/*s6*/ RISCV::X22, -8}, + {/*s7*/ RISCV::X23, -9}, + {/*s8*/ RISCV::X24, -10}, + {/*s9*/ RISCV::X25, -11}, + {/*s10*/ RISCV::X26, -12}, + {/*s11*/ RISCV::X27, -13}}; + +bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, + int &FrameIdx) const { + const auto *RVFI = MF.getInfo(); + if (!RVFI->useSaveRestoreLibCalls()) + return false; + + auto FII = FixedCSRFIMap.find(Reg); + if (FII == FixedCSRFIMap.end()) + return false; + + FrameIdx = FII->second; + return true; +} + void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { Index: test/CodeGen/RISCV/saverestore.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/saverestore.ll @@ -0,0 +1,422 @@ +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -enable-save-restore < %s | FileCheck %s -check-prefix=RV32I-SR +; RUN: llc -mtriple=riscv64 -enable-save-restore < %s | FileCheck %s -check-prefix=RV64I-SR + + +; Check that the correct save/restore libcalls are generated. + +@var0 = global [18 x i32] zeroinitializer +@var1 = global [24 x i32] zeroinitializer +@var2 = global [30 x i32] zeroinitializer + +define void @callee_saved0() nounwind { +; RV32I-LABEL: callee_saved0: +; RV32I: addi sp, sp, -32 +; RV32I-NEXT: sw s0, 28(sp) +; RV32I-NEXT: sw s1, 24(sp) +; RV32I-NEXT: sw s2, 20(sp) +; RV32I-NEXT: sw s3, 16(sp) +; RV32I-NEXT: sw s4, 12(sp) +; RV32I: lw s4, 12(sp) +; RV32I-NEXT: lw s3, 16(sp) +; RV32I-NEXT: lw s2, 20(sp) +; RV32I-NEXT: lw s1, 24(sp) +; RV32I-NEXT: lw s0, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_saved0: +; RV64I: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) +; RV64I-NEXT: sd s1, 32(sp) +; RV64I-NEXT: sd s2, 24(sp) +; RV64I-NEXT: sd s3, 16(sp) +; RV64I: ld s4, 8(sp) +; RV64I-NEXT: ld s3, 16(sp) +; RV64I-NEXT: ld s2, 24(sp) +; RV64I-NEXT: ld s1, 32(sp) +; RV64I-NEXT: ld s0, 40(sp) +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV32I-SR-LABEL: callee_saved0: +; RV32I-SR: call t0, __riscv_save_5 +; RV32I-SR: tail __riscv_restore_5 +; +; RV64I-SR-LABEL: callee_saved0: +; RV64I-SR: call t0, __riscv_save_5 +; RV64I-SR: tail __riscv_restore_5 + %val = load [18 x i32], [18 x i32]* @var0 + store volatile [18 x i32] %val, [18 x i32]* @var0 + ret void +} + +define void @callee_saved1() nounwind { +; RV32I-LABEL: callee_saved1: +; RV32I: addi sp, sp, -48 +; RV32I-NEXT: sw s0, 44(sp) +; RV32I-NEXT: sw s1, 40(sp) +; RV32I-NEXT: sw s2, 36(sp) +; RV32I-NEXT: sw s3, 32(sp) +; RV32I-NEXT: sw s4, 28(sp) +; RV32I-NEXT: sw s5, 24(sp) +; RV32I-NEXT: sw s6, 20(sp) +; RV32I-NEXT: sw s7, 16(sp) +; RV32I-NEXT: sw s8, 12(sp) +; RV32I-NEXT: sw s9, 8(sp) +; RV32I-NEXT: sw s10, 4(sp) +; RV32I: lw s10, 4(sp) +; RV32I-NEXT: lw s9, 8(sp) +; RV32I-NEXT: lw s8, 12(sp) +; RV32I-NEXT: lw s7, 16(sp) +; RV32I-NEXT: lw s6, 20(sp) +; RV32I-NEXT: lw s5, 24(sp) +; RV32I-NEXT: lw s4, 28(sp) +; RV32I-NEXT: lw s3, 32(sp) +; RV32I-NEXT: lw s2, 36(sp) +; RV32I-NEXT: lw s1, 40(sp) +; RV32I-NEXT: lw s0, 44(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_saved1: +; RV64I: addi sp, sp, -96 +; RV64I-NEXT: sd s0, 88(sp) +; RV64I-NEXT: sd s1, 80(sp) +; RV64I-NEXT: sd s2, 72(sp) +; RV64I-NEXT: sd s3, 64(sp) +; RV64I-NEXT: sd s4, 56(sp) +; RV64I-NEXT: sd s5, 48(sp) +; RV64I-NEXT: sd s6, 40(sp) +; RV64I-NEXT: sd s7, 32(sp) +; RV64I-NEXT: sd s8, 24(sp) +; RV64I-NEXT: sd s9, 16(sp) +; RV64I-NEXT: sd s10, 8(sp) +; RV64I: ld s10, 8(sp) +; RV64I-NEXT: ld s9, 16(sp) +; RV64I-NEXT: ld s8, 24(sp) +; RV64I-NEXT: ld s7, 32(sp) +; RV64I-NEXT: ld s6, 40(sp) +; RV64I-NEXT: ld s5, 48(sp) +; RV64I-NEXT: ld s4, 56(sp) +; RV64I-NEXT: ld s3, 64(sp) +; RV64I-NEXT: ld s2, 72(sp) +; RV64I-NEXT: ld s1, 80(sp) +; RV64I-NEXT: ld s0, 88(sp) +; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: ret +; +; RV32I-SR-LABEL: callee_saved1: +; RV32I-SR: call t0, __riscv_save_11 +; RV32I-SR: tail __riscv_restore_11 +; +; RV64I-SR-LABEL: callee_saved1: +; RV64I-SR: call t0, __riscv_save_11 +; RV64I-SR: tail __riscv_restore_11 + %val = load [24 x i32], [24 x i32]* @var1 + store volatile [24 x i32] %val, [24 x i32]* @var1 + ret void +} + +define void @callee_saved2() nounwind { +; RV32I-LABEL: callee_saved2: +; RV32I: addi sp, sp, -64 +; RV32I-NEXT: sw s0, 60(sp) +; RV32I-NEXT: sw s1, 56(sp) +; RV32I-NEXT: sw s2, 52(sp) +; RV32I-NEXT: sw s3, 48(sp) +; RV32I-NEXT: sw s4, 44(sp) +; RV32I-NEXT: sw s5, 40(sp) +; RV32I-NEXT: sw s6, 36(sp) +; RV32I-NEXT: sw s7, 32(sp) +; RV32I-NEXT: sw s8, 28(sp) +; RV32I-NEXT: sw s9, 24(sp) +; RV32I-NEXT: sw s10, 20(sp) +; RV32I-NEXT: sw s11, 16(sp) +; RV32I: lw s11, 16(sp) +; RV32I-NEXT: lw s10, 20(sp) +; RV32I-NEXT: lw s9, 24(sp) +; RV32I-NEXT: lw s8, 28(sp) +; RV32I-NEXT: lw s7, 32(sp) +; RV32I-NEXT: lw s6, 36(sp) +; RV32I-NEXT: lw s5, 40(sp) +; RV32I-NEXT: lw s4, 44(sp) +; RV32I-NEXT: lw s3, 48(sp) +; RV32I-NEXT: lw s2, 52(sp) +; RV32I-NEXT: lw s1, 56(sp) +; RV32I-NEXT: lw s0, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_saved2: +; RV64I: addi sp, sp, -128 +; RV64I-NEXT: sd s0, 120(sp) +; RV64I-NEXT: sd s1, 112(sp) +; RV64I-NEXT: sd s2, 104(sp) +; RV64I-NEXT: sd s3, 96(sp) +; RV64I-NEXT: sd s4, 88(sp) +; RV64I-NEXT: sd s5, 80(sp) +; RV64I-NEXT: sd s6, 72(sp) +; RV64I-NEXT: sd s7, 64(sp) +; RV64I-NEXT: sd s8, 56(sp) +; RV64I-NEXT: sd s9, 48(sp) +; RV64I-NEXT: sd s10, 40(sp) +; RV64I-NEXT: sd s11, 32(sp) +; RV64I: ld s11, 32(sp) +; RV64I-NEXT: ld s10, 40(sp) +; RV64I-NEXT: ld s9, 48(sp) +; RV64I-NEXT: ld s8, 56(sp) +; RV64I-NEXT: ld s7, 64(sp) +; RV64I-NEXT: ld s6, 72(sp) +; RV64I-NEXT: ld s5, 80(sp) +; RV64I-NEXT: ld s4, 88(sp) +; RV64I-NEXT: ld s3, 96(sp) +; RV64I-NEXT: ld s2, 104(sp) +; RV64I-NEXT: ld s1, 112(sp) +; RV64I-NEXT: ld s0, 120(sp) +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: ret +; +; RV32I-SR-LABEL: callee_saved2: +; RV32I-SR: call t0, __riscv_save_12 +; RV32I-SR: tail __riscv_restore_12 +; +; RV64I-SR-LABEL: callee_saved2: +; RV64I-SR: call t0, __riscv_save_12 +; RV64I-SR: tail __riscv_restore_12 + %val = load [30 x i32], [30 x i32]* @var2 + store volatile [30 x i32] %val, [30 x i32]* @var2 + ret void +} + +; Check that tail calls are updated correctly by save/restore + +declare i32 @tail_callee(i32 %i) + +define i32 @tail_call(i32 %i) nounwind { +; RV32I-LABEL: tail_call: +; RV32I: addi sp, sp, -32 +; RV32I-NEXT: sw s0, 28(sp) +; RV32I-NEXT: sw s1, 24(sp) +; RV32I-NEXT: sw s2, 20(sp) +; RV32I-NEXT: sw s3, 16(sp) +; RV32I-NEXT: sw s4, 12(sp) +; RV32I-NEXT: sw s5, 8(sp) +; RV32I: lw s5, 8(sp) +; RV32I-NEXT: lw s4, 12(sp) +; RV32I-NEXT: lw s3, 16(sp) +; RV32I-NEXT: lw s2, 20(sp) +; RV32I-NEXT: lw s1, 24(sp) +; RV32I-NEXT: lw s0, 28(sp) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: tail tail_callee +; +; RV64I-LABEL: tail_call: +; RV64I: addi sp, sp, -48 +; RV64I-NEXT: sd s0, 40(sp) +; RV64I-NEXT: sd s1, 32(sp) +; RV64I-NEXT: sd s2, 24(sp) +; RV64I-NEXT: sd s3, 16(sp) +; RV64I-NEXT: sd s4, 8(sp) +; RV64I-NEXT: sd s5, 0(sp) +; RV64I: ld s5, 0(sp) +; RV64I-NEXT: ld s4, 8(sp) +; RV64I-NEXT: ld s3, 16(sp) +; RV64I-NEXT: ld s2, 24(sp) +; RV64I-NEXT: ld s1, 32(sp) +; RV64I-NEXT: ld s0, 40(sp) +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: tail tail_callee +; +; RV32I-SR-LABEL: tail_call: +; RV32I-SR: call t0, __riscv_save_6 +; RV32I-SR: call tail_callee +; RV32I-SR-NEXT: tail __riscv_restore_6 +; +; RV64I-SR-LABEL: tail_call: +; RV64I-SR: call t0, __riscv_save_6 +; RV64I-SR: call tail_callee +; RV64I-SR-NEXT: tail __riscv_restore_6 +entry: + %val = load [18 x i32], [18 x i32]* @var0 + store volatile [18 x i32] %val, [18 x i32]* @var0 + %r = tail call i32 @tail_callee(i32 %i) + ret i32 %r +} + +; Check that functions with varargs do not use save/restore code + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +define i32 @varargs(i8* %fmt, ...) nounwind { +; RV32I-LABEL: varargs: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: sw a7, 44(sp) +; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: addi a1, sp, 24 +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: varargs: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: sd a1, 24(sp) +; RV64I-NEXT: sd a7, 72(sp) +; RV64I-NEXT: sd a6, 64(sp) +; RV64I-NEXT: sd a5, 56(sp) +; RV64I-NEXT: sd a4, 48(sp) +; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: sd a2, 32(sp) +; RV64I-NEXT: addi a0, sp, 24 +; RV64I-NEXT: ori a0, a0, 4 +; RV64I-NEXT: sd a0, 8(sp) +; RV64I-NEXT: lw a0, 24(sp) +; RV64I-NEXT: addi sp, sp, 80 +; RV64I-NEXT: ret +; +; RV32I-SR-LABEL: varargs: +; RV32I-SR: # %bb.0: +; RV32I-SR-NEXT: addi sp, sp, -48 +; RV32I-SR-NEXT: mv a0, a1 +; RV32I-SR-NEXT: sw a7, 44(sp) +; RV32I-SR-NEXT: sw a6, 40(sp) +; RV32I-SR-NEXT: sw a5, 36(sp) +; RV32I-SR-NEXT: sw a4, 32(sp) +; RV32I-SR-NEXT: sw a3, 28(sp) +; RV32I-SR-NEXT: sw a2, 24(sp) +; RV32I-SR-NEXT: addi a1, sp, 24 +; RV32I-SR-NEXT: sw a1, 12(sp) +; RV32I-SR-NEXT: sw a0, 20(sp) +; RV32I-SR-NEXT: addi sp, sp, 48 +; RV32I-SR-NEXT: ret +; +; RV64I-SR-LABEL: varargs: +; RV64I-SR: # %bb.0: +; RV64I-SR-NEXT: addi sp, sp, -80 +; RV64I-SR-NEXT: sd a1, 24(sp) +; RV64I-SR-NEXT: sd a7, 72(sp) +; RV64I-SR-NEXT: sd a6, 64(sp) +; RV64I-SR-NEXT: sd a5, 56(sp) +; RV64I-SR-NEXT: sd a4, 48(sp) +; RV64I-SR-NEXT: sd a3, 40(sp) +; RV64I-SR-NEXT: sd a2, 32(sp) +; RV64I-SR-NEXT: addi a0, sp, 24 +; RV64I-SR-NEXT: ori a0, a0, 4 +; RV64I-SR-NEXT: sd a0, 8(sp) +; RV64I-SR-NEXT: lw a0, 24(sp) +; RV64I-SR-NEXT: addi sp, sp, 80 +; RV64I-SR-NEXT: ret + %va = alloca i8*, align 4 + %1 = bitcast i8** %va to i8* + call void @llvm.va_start(i8* %1) + %argp.cur = load i8*, i8** %va, align 4 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %va, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + call void @llvm.va_end(i8* %1) + ret i32 %3 +} + +; Check that dynamic allocation calculations remain correct + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare void @notdead(i8*) + +define void @alloca(i32 %n) nounwind { +; RV32I-LABEL: alloca: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: mv s1, sp +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call notdead +; RV32I-NEXT: mv sp, s1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw s1, 4(sp) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: alloca: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: addi s0, sp, 32 +; RV64I-NEXT: addi a1, zero, 1 +; RV64I-NEXT: slli a1, a1, 33 +; RV64I-NEXT: addi a1, a1, -16 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: mv s1, sp +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call notdead +; RV64I-NEXT: mv sp, s1 +; RV64I-NEXT: addi sp, s0, -32 +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV32I-SR-LABEL: alloca: +; RV32I-SR: # %bb.0: +; RV32I-SR-NEXT: call t0, __riscv_save_2 +; RV32I-SR-NEXT: mv s0, sp +; RV32I-SR-NEXT: mv s1, sp +; RV32I-SR-NEXT: addi a0, a0, 15 +; RV32I-SR-NEXT: andi a0, a0, -16 +; RV32I-SR-NEXT: sub a0, sp, a0 +; RV32I-SR-NEXT: mv sp, a0 +; RV32I-SR-NEXT: call notdead +; RV32I-SR-NEXT: mv sp, s1 +; RV32I-SR-NEXT: mv sp, s0 +; RV32I-SR-NEXT: tail __riscv_restore_2 +; +; RV64I-SR-LABEL: alloca: +; RV64I-SR: # %bb.0: +; RV64I-SR-NEXT: call t0, __riscv_save_2 +; RV64I-SR-NEXT: mv s0, sp +; RV64I-SR-NEXT: addi a1, zero, 1 +; RV64I-SR-NEXT: slli a1, a1, 33 +; RV64I-SR-NEXT: addi a1, a1, -16 +; RV64I-SR-NEXT: slli a0, a0, 32 +; RV64I-SR-NEXT: srli a0, a0, 32 +; RV64I-SR-NEXT: addi a0, a0, 15 +; RV64I-SR-NEXT: and a0, a0, a1 +; RV64I-SR-NEXT: mv s1, sp +; RV64I-SR-NEXT: sub a0, sp, a0 +; RV64I-SR-NEXT: mv sp, a0 +; RV64I-SR-NEXT: call notdead +; RV64I-SR-NEXT: mv sp, s1 +; RV64I-SR-NEXT: mv sp, s0 +; RV64I-SR-NEXT: tail __riscv_restore_2 + %sp = call i8* @llvm.stacksave() + %addr = alloca i8, i32 %n + call void @notdead(i8* %addr) + call void @llvm.stackrestore(i8* %sp) + ret void +}