diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -37,6 +37,8 @@ RISCVMacroFusion.cpp RISCVMCInstLower.cpp RISCVMergeBaseOffset.cpp + RISCVMoveOptimizer.cpp + RISCVPushPopOptimizer.cpp RISCVRedundantCopyElimination.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -68,6 +68,12 @@ FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); +FunctionPass *createRISCVMoveOptimizationPass(); +void initializeRISCVMoveOptPass(PassRegistry &); + +FunctionPass *createRISCVPushPopOptimizationPass(); +void initializeRISCVPushPopOptPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -46,6 +46,8 @@ bool hasBP(const MachineFunction &MF) const; + bool isCSIpushable(const std::vector &CSI) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -222,6 +222,97 @@ return RestoreLibCalls[LibCallID]; } +// Return encoded value for PUSH/POP instruction, representing +// registers to store/load. +static int getPushPopEncoding(const Register MaxReg) { + switch (MaxReg) { + default: + llvm_unreachable("Something has gone wrong!"); + case /*s11*/ RISCV::X27: + return 15; + case /*s9*/ RISCV::X25: + return 14; + case /*s8*/ RISCV::X24: + return 13; + case /*s7*/ RISCV::X23: + return 12; + case /*s6*/ RISCV::X22: + return 11; + case /*s5*/ RISCV::X21: + return 10; + case /*s4*/ RISCV::X20: + return 9; + case /*s3*/ RISCV::X19: + return 8; + case /*s2*/ RISCV::X18: + return 7; + case /*s1*/ RISCV::X9: + return 6; + case /*s0*/ RISCV::X8: + return 5; + case /*ra*/ RISCV::X1: + return 4; + } +} + +void reallocPushStackFream(MachineFunction &MF) { + auto *RVFI = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + std::vector CSI = MFI.getCalleeSavedInfo(); + // realloc stack frame for PUSH + size_t NonePushStackOffset = -RVFI->getRVPushStackSize(); + for (const auto &Entry : CSI) { + int FrameIdx = Entry.getFrameIdx(); + Register Reg = Entry.getReg(); + if (!(Reg == RISCV::X26 || RISCV::PGPRRegClass.contains(Reg))) { + NonePushStackOffset -= MFI.getObjectSize(Entry.getFrameIdx()); + MFI.setObjectOffset(FrameIdx, NonePushStackOffset); + } + } +} + +static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI, + uint64_t StackAdj, bool isPop) { + // The spec allocates 2 bits to specify number of extra 16 byte blocks. + uint32_t AvailableAdj = 48; + uint64_t RequiredAdj = StackAdj; + + // Use available stack adjustment in Zc PUSH/POP instruction + // to allocate/deallocate space on stack. + int OpNum = MBBI->getNumOperands(); + auto &Operand = MBBI->getOperand(OpNum - 1); + int RegisterOffset = Operand.getImm(); + RequiredAdj -= RegisterOffset; + + if (RequiredAdj >= AvailableAdj) { + RequiredAdj -= AvailableAdj; + StackAdj = AvailableAdj; + } else { + // Round to the nearest 16 byte block able to fit RequiredAdj. + StackAdj = alignTo(RequiredAdj, 16); + RequiredAdj = 0; + } + Operand.setImm(StackAdj); + MBBI->setFlag(isPop ? MachineInstr::FrameDestroy : MachineInstr::FrameSetup); + return RequiredAdj; +} + +// Checks if Zc PUSH/POP instructions can be used with the given CSI. +bool RISCVFrameLowering::isCSIpushable( + const std::vector &CSI) const { + if (!STI.hasStdExtZcmp() || CSI.empty()) + return false; + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = + STI.getRegisterInfo()->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + return true; + } + return false; +} + // Return true if the specified function should have a dedicated frame // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has @@ -350,11 +441,11 @@ // Returns the register used to hold the stack pointer. static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } -static SmallVector +static std::vector getNonLibcallCSI(const MachineFunction &MF, const std::vector &CSI) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - SmallVector NonLibcallCSI; + std::vector NonLibcallCSI; for (auto &CS : CSI) { int FI = CS.getFrameIdx(); @@ -469,8 +560,36 @@ RealStackSize = FirstSPAdjustAmount; } - // Allocate space on the stack if necessary. - adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + const auto &CSI = MFI.getCalleeSavedInfo(); + bool PushEnabled = isCSIpushable(CSI); + if (PushEnabled && (CSI.size() != 0)) { + // Check at what offset spilling of registers starts and allocate space + // before it. + int64_t preAdjustStack = 0; + for (auto CS : CSI) { + preAdjustStack = + std::min(preAdjustStack, -(MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx()))); + } + if (preAdjustStack != 0) + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -preAdjustStack, + MachineInstr::FrameSetup); + StackSize -= preAdjustStack; + + // Use available stack adjustment in push instruction to allocate additional + // stack space. + StackSize = adjSPInPushPop(MBBI, StackSize, false); + if (StackSize != 0) { + adjustReg(MBB, next_nodbg(MBBI, MBB.end()), DL, SPReg, SPReg, -StackSize, + MachineInstr::FrameSetup); + MBBI = next_nodbg(MBBI, MBB.end()); + reallocPushStackFream(MF); + } + } else { + // Allocate space on the stack if necessary. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, + MachineInstr::FrameSetup); + } // Emit ".cfi_def_cfa_offset RealStackSize" unsigned CFIIndex = MF.addFrameInst( @@ -479,15 +598,16 @@ .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); - const auto &CSI = MFI.getCalleeSavedInfo(); - - // The frame pointer is callee-saved, and code has been generated for us to - // save it to the stack. We need to skip over the storing of callee-saved - // registers as the frame pointer must be modified after it has been saved - // to the stack, not before. - // FIXME: assumes exactly one instruction is used to save each callee-saved - // register. - std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); + if (PushEnabled) + std::advance(MBBI, 1); + else + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + // FIXME: assumes exactly one instruction is used to save each callee-saved + // register. + std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -628,7 +748,10 @@ // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; - if (!CSI.empty()) + bool PopEnabled = isCSIpushable(CSI); + if (PopEnabled) + LastFrameDestroy = prev_nodbg(MBBI, MBB.begin()); + else if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); uint64_t StackSize = getStackSizeWithRVVPadding(MF); @@ -672,7 +795,31 @@ StackSize = FirstSPAdjustAmount; // Deallocate stack - adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); + if (PopEnabled) { + // Check at what offset spilling of registers starts and calculate space + // before it. + int64_t preAdjustSize = 0; + for (auto CS : CSI) { + preAdjustSize = + std::min(preAdjustSize, -(MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx()))); + } + adjustReg(MBB, MBBI, DL, SPReg, SPReg, preAdjustSize, + MachineInstr::FrameDestroy); + StackSize -= preAdjustSize; + if (preAdjustSize != 0) + MBBI = prev_nodbg(MBBI, MBB.begin()); + + // Use available stack adjustment in pop instruction to deallocate stack + // space. + StackSize = adjSPInPushPop(prev_nodbg(MBBI, MBB.begin()), StackSize, true); + if (StackSize != 0) { + adjustReg(MBB, prev_nodbg(MBBI, MBB.begin()), DL, SPReg, SPReg, StackSize, + MachineInstr::FrameDestroy); + } + } else + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, + MachineInstr::FrameDestroy); // Emit epilogue for shadow call stack. emitSCSEpilogue(MF, MBB, MBBI, DL); @@ -1159,26 +1306,67 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - const char *SpillLibCall = getSpillLibCallName(*MF, CSI); - if (SpillLibCall) { - // Add spill libcall via non-callee-saved register t0. - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) - .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameSetup); + // Emmit CM.PUSH with base SPimm & evaluate Push stack + if (isCSIpushable(CSI.vec())) { + auto *RVFI = MF->getInfo(); + uint64_t PushStackSize = 0; + std::vector NonePushCSI; + Register MaxReg = RISCV::NoRegister; + + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) { + if (Reg != RISCV::X27) + PushStackSize += 4; + MaxReg = std::max(MaxReg.id(), Reg.id()); + } else if (Reg.id() == RISCV::X26) { + PushStackSize += 8; + MaxReg = RISCV::X27; + } else + NonePushCSI.push_back(CS); + } + RVFI->setRVPushStackSize(PushStackSize); + + MachineInstrBuilder PushBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH)); + // Use encoded number to represent registers to spill. + int RegEnc = getPushPopEncoding(MaxReg); + PushBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI); + PushBuilder.addImm(SpImmBase); + + for (auto &CS : NonePushCSI) { + Register Reg = CS.getReg(); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), + TRI->getMinimalPhysRegClass(Reg), TRI); + } + } else { + const char *SpillLibCall = getSpillLibCallName(*MF, CSI); + if (SpillLibCall) { + // Add spill libcall via non-callee-saved register t0. + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) + .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameSetup); - // Add registers spilled in libcall as liveins. - for (auto &CS : CSI) - MBB.addLiveIn(CS.getReg()); - } + // Add registers spilled in libcall as liveins. + for (auto &CS : CSI) + MBB.addLiveIn(CS.getReg()); + } - // Manually spill values not spilled by libcall. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - // Insert the spill to the stack frame. - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI); + // Manually spill values not spilled by libcall. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + // Insert the spill to the stack frame. + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), + CS.getFrameIdx(), RC, TRI); + } } return true; @@ -1196,36 +1384,63 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - // Manually restore values not restored by libcall. - // Keep the same order as in the prologue. There is no need to reverse the - // order in the epilogue. In addition, the return address will be restored - // first in the epilogue. It increases the opportunity to avoid the - // load-to-use data hazard between loading RA and return by RA. - // loadRegFromStackSlot can insert multiple instructions. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } + if (isCSIpushable(CSI.vec())) { + Register MaxReg = RISCV::NoRegister; + + for (auto &CS : reverse(CSI)) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + MaxReg = std::max(MaxReg.id(), Reg.id()); + else if (Reg.id() == RISCV::X26) { + MaxReg = RISCV::X27; + } else + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + } - const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); - if (RestoreLibCall) { - // Add restore libcall via tail call. - MachineBasicBlock::iterator NewMI = - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) - .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameDestroy); - - // Remove trailing returns, since the terminator is now a tail call to the - // restore function. - if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { - NewMI->copyImplicitOps(*MF, *MI); - MI->eraseFromParent(); + MachineInstrBuilder PopBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_POP)); + // Use encoded number to represent registers to restore. + int RegEnc = getPushPopEncoding(MaxReg); + PopBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI); + PopBuilder.addImm(SpImmBase); + } else { + // Manually restore values not restored by libcall. + // Keep the same order as in the prologue. There is no need to reverse the + // order in the epilogue. In addition, the return address will be restored + // first in the epilogue. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. + // loadRegFromStackSlot can insert multiple instructions. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); } - } + const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); + if (RestoreLibCall) { + // Add restore libcall via tail call. + MachineBasicBlock::iterator NewMI = + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) + .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameDestroy); + + // Remove trailing returns, since the terminator is now a tail call to the + // restore function. + if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { + NewMI->copyImplicitOps(*MF, *MI); + MI->eraseFromParent(); + } + } + } return true; } diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -63,6 +63,8 @@ uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// Size of stack frame for zcmp PUSH/POP + unsigned RVPushStackSize = 0; public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -93,7 +95,8 @@ // function uses a varargs save area, or is an interrupt handler. return MF.getSubtarget().enableSaveRestore() && VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() && - !MF.getFunction().hasFnAttribute("interrupt"); + !MF.getFunction().hasFnAttribute("interrupt") && + !MF.getSubtarget().hasStdExtZcmp(); } uint64_t getRVVStackSize() const { return RVVStackSize; } @@ -108,6 +111,9 @@ unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + uint64_t getRVPushStackSize() const { return RVPushStackSize; } + void setRVPushStackSize(uint64_t Size) { RVPushStackSize = Size; } + void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); }; diff --git a/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp @@ -0,0 +1,252 @@ +//===---------- RISCVMoveOptimizer.cpp - RISCV move opt. pass -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs move related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_MOVE_OPT_NAME "RISC-V Zc move merging pass" + +namespace { +struct RISCVMoveOpt : public MachineFunctionPass { + static char ID; + + RISCVMoveOpt() : MachineFunctionPass(ID) { + initializeRISCVMoveOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool isCandidateToMergeMVA01S(DestSourcePair &RegPair); + bool isCandidateToMergeMVSA01(DestSourcePair &RegPair); + // Merge the two instructions indicated into a single pair instruction. + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, unsigned Opcode); + + // Look for C.MV instruction that can be combined with + // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching + // instruction if one exists. + MachineBasicBlock::iterator + findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode); + bool MovOpt(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return RISCV_MOVE_OPT_NAME; } +}; + +char RISCVMoveOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVMoveOpt, "riscv-mov-opt", RISCV_MOVE_OPT_NAME, false, + false) + +// Check if registers meet CM.MVA01S constraints. +bool RISCVMoveOpt::isCandidateToMergeMVA01S(DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *SourceRC = TRI->getMinimalPhysRegClass(Source); + // If destination is not a0 or a1. + if (Destination == RISCV::X10 || Destination == RISCV::X11) + if (RISCV::SR07RegClass.hasSubClassEq(SourceRC)) + return true; + return false; +} + +// Check if registers meet CM.MVSA01 constraints. +bool RISCVMoveOpt::isCandidateToMergeMVSA01(DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *DestinationRC = + TRI->getMinimalPhysRegClass(Destination); + // If Source is s0 - s7. + if (RISCV::SR07RegClass.hasSubClassEq(DestinationRC)) + if (Source == RISCV::X10 || Source == RISCV::X11) + return true; + return false; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + unsigned Opcode) { + const MachineOperand *Sreg1, *Sreg2; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value(); + DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value(); + Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S + ? FirstPair.Destination->getReg() + : FirstPair.Source->getReg(); + + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + DebugLoc DL = I->getDebugLoc(); + + // The order of S-reg depends on which instruction holds A0, instead of + // the order of register pair. + // e,g. + // mv a1, s1 + // mv a0, s2 => cm.mva01s s2,s1 + // + // mv a0, s2 + // mv a1, s1 => cm.mva01s s2,s1 + if (Opcode == RISCV::CM_MVA01S) { + Sreg1 = + ARegInFirstPair == RISCV::X10 ? FirstPair.Source : PairedRegs.Source; + Sreg2 = + ARegInFirstPair == RISCV::X10 ? PairedRegs.Source : FirstPair.Source; + } else { + Sreg1 = ARegInFirstPair == RISCV::X10 ? FirstPair.Destination + : PairedRegs.Destination; + Sreg2 = ARegInFirstPair == RISCV::X10 ? PairedRegs.Destination + : FirstPair.Destination; + } + + BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2); + + I->eraseFromParent(); + Paired->eraseFromParent(); + return NextI; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::findMatchingInst(MachineBasicBlock::iterator &MBBI, + unsigned InstOpcode) { + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*MBBI).value(); + + // Track which register units have been modified and used between the first + // insn and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + for (MachineBasicBlock::iterator I = next_nodbg(MBBI, E); I != E; + I = next_nodbg(I, E)) { + + MachineInstr &MI = *I; + + if (auto SecondPair = TII->isCopyInstrImpl(MI)) { + Register SourceReg = SecondPair->Source->getReg(); + Register DestReg = SecondPair->Destination->getReg(); + + if (InstOpcode == RISCV::CM_MVA01S && + isCandidateToMergeMVA01S(*SecondPair)) { + // If register pair is valid and destination registers are different. + if ((FirstPair.Destination->getReg() == DestReg)) + return E; + + // If paired destination register was modified or used, there is no + // possibility of finding matching instruction so exit early. + if (!ModifiedRegUnits.available(DestReg) || + !UsedRegUnits.available(DestReg)) + return E; + + // We need to check if the source register in the second paired + // instruction is defined in between. + if (ModifiedRegUnits.available(SourceReg)) + return I; + + } else if (InstOpcode == RISCV::CM_MVSA01 && + isCandidateToMergeMVSA01(*SecondPair)) { + if ((FirstPair.Source->getReg() == SourceReg) || + (FirstPair.Destination->getReg() == DestReg)) + return E; + + if (!ModifiedRegUnits.available(SourceReg) || + !UsedRegUnits.available(SourceReg)) + return E; + + // As for mvsa01, we need to make sure the dest register of the second + // paired instruction is not used in between, since we would move its + // definition ahead. + if (UsedRegUnits.available(DestReg)) + return I; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + } + return E; +} + +// Finds instructions, which could be represented as C.MV instructions and +// merged into CM.MVA01S or CM.MVSA01. +bool RISCVMoveOpt::MovOpt(MachineBasicBlock &MBB) { + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + // Check if the instruction can be compressed to C.MV instruction. If it + // can, return Dest/Src register pair. + auto RegPair = TII->isCopyInstrImpl(*MBBI); + if (RegPair.has_value()) { + unsigned Opcode = 0; + + if (isCandidateToMergeMVA01S(*RegPair)) + Opcode = RISCV::CM_MVA01S; + else if (isCandidateToMergeMVSA01(*RegPair)) + Opcode = RISCV::CM_MVSA01; + else { + ++MBBI; + continue; + } + + MachineBasicBlock::iterator Paired = findMatchingInst(MBBI, Opcode); + // If matching instruction could be found merge them. + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, Opcode); + Modified = true; + continue; + } + } + ++MBBI; + } + return Modified; +} + +bool RISCVMoveOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we optimize a + // move. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + Modified |= MovOpt(MBB); + } + return Modified; +} + +/// createRISCVMoveOptimizationPass - returns an instance of the +/// move optimization pass. +FunctionPass *llvm::createRISCVMoveOptimizationPass() { + return new RISCVMoveOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp @@ -0,0 +1,161 @@ +//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that modifies PUSH/POP instructions from Zca +// standard to use their non prolog/epilog related functionalities +// and generates POPRET instruction. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zc Push/Pop optimization pass" + +namespace { +struct RISCVPushPopOpt : public MachineFunctionPass { + static char ID; + + RISCVPushPopOpt() : MachineFunctionPass(ID) { + initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool usePopRet(MachineBasicBlock::iterator &MBBI); + bool adjustRetVal(MachineBasicBlock::iterator &MBBI); + bool runOnMachineFunction(MachineFunction &Fn) override; + + std::map retValMap; + + StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; } +}; + +char RISCVPushPopOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME, + false, false) + +// Check if POP instruction was inserted into the MBB and return iterator to it. +static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); + MBBI = next_nodbg(MBBI, MBB.end())) + if (MBBI->getOpcode() == RISCV::CM_POP) + return MBBI; + + return MBB.end(); +} + +bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI) { + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(MBBI, E); + // Since Pseudo instruction lowering happen later in the pipeline, + // this will detect all ret instruction. + if (NextI->getOpcode() == RISCV::PseudoRET) { + DebugLoc DL = NextI->getDebugLoc(); + auto retValInfo = retValMap.find(&(*MBBI)); + if (retValInfo == retValMap.end()) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRET)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + else if (retValInfo->second == 0) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRETZ)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + // If the return value is not 0 then POPRETZ is not used. + else + return false; + MBBI->eraseFromParent(); + NextI->eraseFromParent(); + return true; + } + return false; +} + +// Search for last assignment to a0 and if possible use ret_val slot of POP to +// store return value. +bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) { + MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend(); + // Track which register units have been modified and used between the POP + // insn and the last assignment to register a0. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + retValMap.clear(); + Register A0(RISCV::X10); + // Since POP instruction is in Epilogue no normal instructions will follow + // after it. Therefore search only previous ones to find the return value. + for (MachineBasicBlock::reverse_iterator I = + next_nodbg(MBBI.getReverse(), RE); + I != RE; I = next_nodbg(I, RE)) { + MachineInstr &MI = *I; + if (auto OperandPair = TII->isLoadImmImpl(MI)) { + Register DestReg = OperandPair->Destination->getReg(); + int64_t RetVal = OperandPair->Source->getImm(); + if (DestReg == RISCV::X10) { + switch (RetVal) { + default: + return false; + case 0: + retValMap[&(*MBBI)] = 0; + } + MI.removeFromParent(); + return true; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + // If a0 was modified or used, there is no possibility + // of using ret_val slot of POP instruction. + if (!ModifiedRegUnits.available(A0) || !UsedRegUnits.available(A0)) + return false; + } + return false; +} + +bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + // If Zca extension is not supported abort. + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we determine + // correct return value for the POP. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + auto MBBI = containsPop(MBB); + if (MBBI != MBB.end()) { + Modified |= adjustRetVal(MBBI); + if (MBB.isReturnBlock()) + Modified |= usePopRet(MBBI); + } + } + return Modified; +} + +/// createRISCVPushPopOptimizationPass - returns an instance of the +/// Push/Pop optimization pass. +FunctionPass *llvm::createRISCVPushPopOptimizationPass() { + return new RISCVPushPopOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -55,6 +55,8 @@ initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVMoveOptPass(*PR); + initializeRISCVPushPopOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -242,6 +244,10 @@ } void RISCVPassConfig::addPreEmitPass2() { + addPass(createRISCVMoveOptimizationPass()); + // Schedule PushPop Optimization before expansion of Pseudo instruction, + // ensuring return instruction is detected correctly. + addPass(createRISCVPushPopOptimizationPass()); addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the // possibility for other passes to break the requirements for forward diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -55,6 +55,8 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: RISC-V Zc move merging pass +; CHECK-NEXT: RISC-V Zc Push/Pop optimization pass ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -160,6 +160,8 @@ ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: RISC-V Zc move merging pass +; CHECK-NEXT: RISC-V Zc Push/Pop optimization pass ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -42,6 +42,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV32ZICBOM %s ; RUN: llc -mtriple=riscv32 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV32ZICBOZ %s ; RUN: llc -mtriple=riscv32 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV32ZICBOP %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV32ZCMP %s + ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefix=RV64ZMMUL %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefix=RV64MZMMUL %s @@ -84,6 +86,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV64ZICBOM %s ; RUN: llc -mtriple=riscv64 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV64ZICBOZ %s ; RUN: llc -mtriple=riscv64 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV64ZICBOP %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV64ZCMP %s ; RV32M: .attribute 5, "rv32i2p0_m2p0" ; RV32ZMMUL: .attribute 5, "rv32i2p0_zmmul1p0" @@ -127,6 +130,7 @@ ; RV32ZICBOM: .attribute 5, "rv32i2p0_zicbom1p0" ; RV32ZICBOZ: .attribute 5, "rv32i2p0_zicboz1p0" ; RV32ZICBOP: .attribute 5, "rv32i2p0_zicbop1p0" +; RV32ZCMP: .attribute 5, "rv32i2p0_zca0p70_zcmp0p70" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64ZMMUL: .attribute 5, "rv64i2p0_zmmul1p0" @@ -170,6 +174,7 @@ ; RV64ZICBOM: .attribute 5, "rv64i2p0_zicbom1p0" ; RV64ZICBOZ: .attribute 5, "rv64i2p0_zicboz1p0" ; RV64ZICBOP: .attribute 5, "rv64i2p0_zicbop1p0" +; RV64ZCMP: .attribute 5, "rv64i2p0_zca0p70_zcmp0p70" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -9,6 +9,8 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I-WITH-FP +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -19,6 +21,8 @@ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I-WITH-FP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP @var = global [32 x i32] zeroinitializer @@ -249,6 +253,89 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 80 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: callee: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; ; RV64I-LABEL: callee: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -160 @@ -471,6 +558,89 @@ ; RV64I-WITH-FP-NEXT: ld s11, 56(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 160 ; RV64I-WITH-FP-NEXT: ret +; +; RV64IZCMP-LABEL: callee: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 %val = load [32 x i32], [32 x i32]* @var store volatile [32 x i32] %val, [32 x i32]* @var ret void @@ -769,6 +939,124 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 144 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: caller: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: lui s0, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi s5, s0, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(s5) +; RV32IZCMP-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(s5) +; RV32IZCMP-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 24(s5) +; RV32IZCMP-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 28(s5) +; RV32IZCMP-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 32(s5) +; RV32IZCMP-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 36(s5) +; RV32IZCMP-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 40(s5) +; RV32IZCMP-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 44(s5) +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 48(s5) +; RV32IZCMP-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 52(s5) +; RV32IZCMP-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 56(s5) +; RV32IZCMP-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 60(s5) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 64(s5) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 68(s5) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 72(s5) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 76(s5) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 80(s5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 84(s5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw s3, 88(s5) +; RV32IZCMP-NEXT: lw s4, 92(s5) +; RV32IZCMP-NEXT: lw s6, 96(s5) +; RV32IZCMP-NEXT: lw s7, 100(s5) +; RV32IZCMP-NEXT: lw s8, 104(s5) +; RV32IZCMP-NEXT: lw s9, 108(s5) +; RV32IZCMP-NEXT: lw s10, 112(s5) +; RV32IZCMP-NEXT: lw s11, 116(s5) +; RV32IZCMP-NEXT: lw s1, 120(s5) +; RV32IZCMP-NEXT: lw s2, 124(s5) +; RV32IZCMP-NEXT: call callee@plt +; RV32IZCMP-NEXT: sw s2, 124(s5) +; RV32IZCMP-NEXT: sw s1, 120(s5) +; RV32IZCMP-NEXT: sw s11, 116(s5) +; RV32IZCMP-NEXT: sw s10, 112(s5) +; RV32IZCMP-NEXT: sw s9, 108(s5) +; RV32IZCMP-NEXT: sw s8, 104(s5) +; RV32IZCMP-NEXT: sw s7, 100(s5) +; RV32IZCMP-NEXT: sw s6, 96(s5) +; RV32IZCMP-NEXT: sw s4, 92(s5) +; RV32IZCMP-NEXT: sw s3, 88(s5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 84(s5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 80(s5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 76(s5) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 72(s5) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 68(s5) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 64(s5) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 60(s5) +; RV32IZCMP-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 56(s5) +; RV32IZCMP-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 52(s5) +; RV32IZCMP-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 48(s5) +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 44(s5) +; RV32IZCMP-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 40(s5) +; RV32IZCMP-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 36(s5) +; RV32IZCMP-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 32(s5) +; RV32IZCMP-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 28(s5) +; RV32IZCMP-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 24(s5) +; RV32IZCMP-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(s5) +; RV32IZCMP-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(s5) +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 112 +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -288 @@ -1057,7 +1345,124 @@ ; RV64I-WITH-FP-NEXT: ld s11, 184(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 288 ; RV64I-WITH-FP-NEXT: ret - +; +; RV64IZCMP-LABEL: caller: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -128 +; RV64IZCMP-NEXT: lui s0, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi s5, s0, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(s5) +; RV64IZCMP-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(s5) +; RV64IZCMP-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 24(s5) +; RV64IZCMP-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 28(s5) +; RV64IZCMP-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 32(s5) +; RV64IZCMP-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 36(s5) +; RV64IZCMP-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 40(s5) +; RV64IZCMP-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 44(s5) +; RV64IZCMP-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 48(s5) +; RV64IZCMP-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 52(s5) +; RV64IZCMP-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 56(s5) +; RV64IZCMP-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 60(s5) +; RV64IZCMP-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 64(s5) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 68(s5) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 72(s5) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 76(s5) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 80(s5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 84(s5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw s3, 88(s5) +; RV64IZCMP-NEXT: lw s4, 92(s5) +; RV64IZCMP-NEXT: lw s6, 96(s5) +; RV64IZCMP-NEXT: lw s7, 100(s5) +; RV64IZCMP-NEXT: lw s8, 104(s5) +; RV64IZCMP-NEXT: lw s9, 108(s5) +; RV64IZCMP-NEXT: lw s10, 112(s5) +; RV64IZCMP-NEXT: lw s11, 116(s5) +; RV64IZCMP-NEXT: lw s1, 120(s5) +; RV64IZCMP-NEXT: lw s2, 124(s5) +; RV64IZCMP-NEXT: call callee@plt +; RV64IZCMP-NEXT: sw s2, 124(s5) +; RV64IZCMP-NEXT: sw s1, 120(s5) +; RV64IZCMP-NEXT: sw s11, 116(s5) +; RV64IZCMP-NEXT: sw s10, 112(s5) +; RV64IZCMP-NEXT: sw s9, 108(s5) +; RV64IZCMP-NEXT: sw s8, 104(s5) +; RV64IZCMP-NEXT: sw s7, 100(s5) +; RV64IZCMP-NEXT: sw s6, 96(s5) +; RV64IZCMP-NEXT: sw s4, 92(s5) +; RV64IZCMP-NEXT: sw s3, 88(s5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 84(s5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 80(s5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 76(s5) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 72(s5) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 68(s5) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 64(s5) +; RV64IZCMP-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 60(s5) +; RV64IZCMP-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 56(s5) +; RV64IZCMP-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 52(s5) +; RV64IZCMP-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 48(s5) +; RV64IZCMP-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 44(s5) +; RV64IZCMP-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 40(s5) +; RV64IZCMP-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 36(s5) +; RV64IZCMP-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 32(s5) +; RV64IZCMP-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 28(s5) +; RV64IZCMP-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 24(s5) +; RV64IZCMP-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(s5) +; RV64IZCMP-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(s5) +; RV64IZCMP-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: addi sp, sp, 128 +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 %val = load [32 x i32], [32 x i32]* @var call void @callee() store volatile [32 x i32] %val, [32 x i32]* @var diff --git a/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll @@ -0,0 +1,163 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID,VALID32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID + + +; Function Attrs: nounwind +define dso_local i32 @cmva(i32 %num, i32 %f, i32 %d, i32 %dx) local_unnamed_addr #0 { +; VALID-LABEL: cmva: +; VALID: cm.mva01s {{s[0-7]}}, {{s[0-7]}} +; VALID-NOT: cm.mva01s {{a.}}, {{a.}} +entry: + %mul = mul nsw i32 %dx, %d + %sub = sub nsw i32 %mul, %dx + %add = add nsw i32 %mul, %d + %mul2 = mul nsw i32 %sub, %dx + %add3 = add nsw i32 %add, %mul2 + %mul4 = mul nsw i32 %add3, %d + %add6 = add nsw i32 %add3, %num + %add5 = add i32 %sub, %f + %add7 = add i32 %add5, %mul4 + ret i32 %add7 +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) + +define i64 @cmvs32(i64 %a) nounwind { +; VALID32-LABEL: cmvs32: +; VALID32: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID32-NOT: cm.mvsa01 {{a.}}, {{a.}} + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +declare void @hoge() +define void @cmvs64(i32 signext %arg, i32 signext %arg1) nounwind { +; VALID64-LABEL: cmvs: +; VALID64: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID64-NOT: cm.mvsa01 {{a.}}, {{a.}} +bb: + %tmp = icmp eq i32 %arg, %arg1 + br i1 %tmp, label %bb6, label %bb2 + +bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ] + tail call void @hoge() + %tmp4 = add nsw i32 %tmp3, 1 + %tmp5 = icmp eq i32 %tmp4, %arg1 + br i1 %tmp5, label %bb6, label %bb2 + +bb6: ; preds = %bb2, %bb + ret void +} + +%struct.trie = type { [26 x %struct.trie*], i8 } + +@word = external global i8* + +declare i32 @trie_new(%struct.trie*) +declare i32 @trie_search(i8*, i32, %struct.trie**) +declare i64 @strnlen(i8*, i64) + +; Function Attrs: nounwind optnone +define i32 @mvas_2() { + ; VALID64-LABEL: mvas_2: + ; VALID64-NOT: cm.mva01s {{a.}}, {{s.}} + ; VALID64-NOT: cm.mva01s {{s.}}, {{a.}} +entry: + %trie = alloca %struct.trie* + %0 = bitcast %struct.trie** %trie to i8* + store %struct.trie* null, %struct.trie** %trie + %call = tail call i32 @trie_new(%struct.trie* null) + %1 = load i8*, i8** @word + %call1 = tail call i64 @strnlen(i8* %1, i64 100) + %conv = trunc i64 %call1 to i32 + %call2 = call i32 @trie_search(i8* %1, i32 %conv, %struct.trie** %trie) + ret i32 %call2 +} + +declare i32 @foo(i32, i32) + +; Function Attrs: nounwind optnone +define dso_local i32 @cm_mvas_same_src(i32 %0, i32 %1, i32 %2, i32 %3) { + ; VALID32-LABEL: cm_mvas_same_src: + ; VALID32: cm.mva01s s0, s0 + ; + ; VALID64-LABEL: cm_mvas_same_src: + ; VALID64: cm.mva01s s0, s0 +entry: + %4 = call i32 @foo(i32 %3, i32 %2) + %5 = add i32 %4, %2 + %6 = call i32 @foo(i32 %3, i32 %3) + %add = add i32 %5, %6 + ret i32 %add +} + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +%struct.Node = type { i8*, i64, %struct.Node*, %struct.Node* } + +declare i8* @malloc(i64) + +declare i32 @fgetc(%struct._IO_FILE*nocapture) + +declare %struct.Node* @addWordToTree(i8*, %struct.Node*) + +; Function Attrs: nounwind optsize +define %struct.Node* @cmmv_a1_come_first(%struct._IO_FILE*nocapture %file, %struct.Node* %root) { + ; VALID32-LABEL: cmmv_a1_come_first: + ; VALID32: cm.push {ra, s0-s5}, -32 + ; VALID32-NEXT: .cfi_offset ra, -4 + ; VALID32-NEXT: .cfi_offset s0, -8 + ; VALID32-NEXT: .cfi_offset s1, -12 + ; VALID32-NEXT: .cfi_offset s2, -16 + ; VALID32-NEXT: .cfi_offset s3, -20 + ; VALID32-NEXT: .cfi_offset s4, -24 + ; VALID32-NEXT: .cfi_offset s5, -28 + ; VALID32-NEXT: cm.mvsa01 s1, s0 + + ; VALID64-LABEL: cmmv_a1_come_first: + ; VALID64: cm.push {ra, s0-s5}, -64 + ; VALID64-NEXT: .cfi_offset ra, -8 + ; VALID64-NEXT: .cfi_offset s0, -16 + ; VALID64-NEXT: .cfi_offset s1, -24 + ; VALID64-NEXT: .cfi_offset s2, -32 + ; VALID64-NEXT: .cfi_offset s3, -40 + ; VALID64-NEXT: .cfi_offset s4, -48 + ; VALID64-NEXT: .cfi_offset s5, -56 + ; VALID64-NEXT: cm.mvsa01 s2, s0 +entry: + %call = tail call dereferenceable_or_null(46) i8* @malloc(i64 46) + %arrayidx = getelementptr inbounds i8, i8* %call, i64 -1 + %call117 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask18 = and i32 %call117, 255 + %cmp.not19 = icmp eq i32 %sext.mask18, 255 + br i1 %cmp.not19, label %while.end, label %land.lhs.true.preheader + +land.lhs.true.preheader: + %arrayidx921 = getelementptr inbounds i8, i8* %call, i64 255 + store i8 0, i8* %arrayidx921 + %call1022 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %root) + %call123 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask24 = and i32 %call123, 255 + %cmp.not25 = icmp eq i32 %sext.mask24, 255 + br i1 %cmp.not25, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +land.lhs.true.land.lhs.true_crit_edge: + %call1026 = phi %struct.Node* [ %call10, %land.lhs.true.land.lhs.true_crit_edge ], [ %call1022, %land.lhs.true.preheader ] + %.pre = load i8, i8* %arrayidx + %cmp6.not = icmp eq i8 %.pre, 39 + %spec.select = select i1 %cmp6.not, i64 0, i64 255 + %arrayidx9 = getelementptr inbounds i8, i8* %call, i64 %spec.select + store i8 0, i8* %arrayidx9 + %call10 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %call1026) + %call1 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask = and i32 %call1, 255 + %cmp.not = icmp eq i32 %sext.mask, 255 + br i1 %cmp.not, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +while.end: + %root.addr.0.lcssa = phi %struct.Node* [ %root, %entry ], [ %call1022, %land.lhs.true.preheader ], [ %call10, %land.lhs.true.land.lhs.true_crit_edge ] + ret %struct.Node* %root.addr.0.lcssa +} diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -0,0 +1,1815 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; NOTE: Check cm.push/cm.pop. +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I %s + +declare void @test(i8*) + +; Function Attrs: optnone +define i32 @foo() { +; RV32IZCMP-LABEL: foo: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV32IZCMP-NEXT: addi sp, sp, -464 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: addi a0, sp, 12 +; RV32IZCMP-NEXT: call test@plt +; RV32IZCMP-NEXT: addi sp, sp, 464 +; RV32IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV64IZCMP-LABEL: foo: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV64IZCMP-NEXT: addi sp, sp, -464 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: addi a0, sp, 8 +; RV64IZCMP-NEXT: call test@plt +; RV64IZCMP-NEXT: addi sp, sp, 464 +; RV64IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV32I-LABEL: foo: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -528 +; RV32I-NEXT: .cfi_def_cfa_offset 528 +; RV32I-NEXT: sw ra, 524(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call test@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: lw ra, 524(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 528 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -528 +; RV64I-NEXT: .cfi_def_cfa_offset 528 +; RV64I-NEXT: sd ra, 520(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call test@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ld ra, 520(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 528 +; RV64I-NEXT: ret + %1 = alloca [512 x i8] + %2 = getelementptr [512 x i8], [512 x i8]* %1, i32 0, i32 0 + call void @test(i8* %2) + ret i32 0 +} + +define dso_local i32 @pushpopret0(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret0: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret0: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret0: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 0 +} + +define dso_local i32 @pushpopret1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 1 +} + +define dso_local i32 @pushpopretneg1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopretneg1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, -1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopretneg1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, -1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopretneg1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, -1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopretneg1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 -1 +} + +define dso_local i32 @pushpopret2(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret2: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 2 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret2: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 2 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 2 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 2 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 2 +} + +define dso_local i32 @tailcall(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: tailcall: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: tailcall: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: tailcall: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: tailcall: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +@var = global [5 x i32] zeroinitializer +define dso_local i32 @nocompress(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: nocompress: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 48 +; RV32IZCMP-NEXT: cm.push {ra, s0-s8}, -48 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: .cfi_offset s1, -12 +; RV32IZCMP-NEXT: .cfi_offset s2, -16 +; RV32IZCMP-NEXT: .cfi_offset s3, -20 +; RV32IZCMP-NEXT: .cfi_offset s4, -24 +; RV32IZCMP-NEXT: .cfi_offset s5, -28 +; RV32IZCMP-NEXT: .cfi_offset s6, -32 +; RV32IZCMP-NEXT: .cfi_offset s7, -36 +; RV32IZCMP-NEXT: .cfi_offset s8, -40 +; RV32IZCMP-NEXT: addi s0, sp, 48 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub s1, sp, a0 +; RV32IZCMP-NEXT: mv sp, s1 +; RV32IZCMP-NEXT: lui s2, %hi(var) +; RV32IZCMP-NEXT: lw s3, %lo(var)(s2) +; RV32IZCMP-NEXT: lw s4, %lo(var+4)(s2) +; RV32IZCMP-NEXT: lw s5, %lo(var+8)(s2) +; RV32IZCMP-NEXT: lw s6, %lo(var+12)(s2) +; RV32IZCMP-NEXT: addi s7, s2, %lo(var) +; RV32IZCMP-NEXT: lw s8, 16(s7) +; RV32IZCMP-NEXT: mv a0, s1 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: sw s8, 16(s7) +; RV32IZCMP-NEXT: sw s6, %lo(var+12)(s2) +; RV32IZCMP-NEXT: sw s5, %lo(var+8)(s2) +; RV32IZCMP-NEXT: sw s4, %lo(var+4)(s2) +; RV32IZCMP-NEXT: sw s3, %lo(var)(s2) +; RV32IZCMP-NEXT: mv a0, s1 +; RV32IZCMP-NEXT: addi sp, s0, -48 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s8}, 48 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: nocompress: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80 +; RV64IZCMP-NEXT: cm.push {ra, s0-s8}, -80 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: .cfi_offset s1, -24 +; RV64IZCMP-NEXT: .cfi_offset s2, -32 +; RV64IZCMP-NEXT: .cfi_offset s3, -40 +; RV64IZCMP-NEXT: .cfi_offset s4, -48 +; RV64IZCMP-NEXT: .cfi_offset s5, -56 +; RV64IZCMP-NEXT: .cfi_offset s6, -64 +; RV64IZCMP-NEXT: .cfi_offset s7, -72 +; RV64IZCMP-NEXT: .cfi_offset s8, -80 +; RV64IZCMP-NEXT: addi s0, sp, 80 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub s1, sp, a0 +; RV64IZCMP-NEXT: mv sp, s1 +; RV64IZCMP-NEXT: lui s2, %hi(var) +; RV64IZCMP-NEXT: lw s3, %lo(var)(s2) +; RV64IZCMP-NEXT: lw s4, %lo(var+4)(s2) +; RV64IZCMP-NEXT: lw s5, %lo(var+8)(s2) +; RV64IZCMP-NEXT: lw s6, %lo(var+12)(s2) +; RV64IZCMP-NEXT: addi s7, s2, %lo(var) +; RV64IZCMP-NEXT: lw s8, 16(s7) +; RV64IZCMP-NEXT: mv a0, s1 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: sw s8, 16(s7) +; RV64IZCMP-NEXT: sw s6, %lo(var+12)(s2) +; RV64IZCMP-NEXT: sw s5, %lo(var+8)(s2) +; RV64IZCMP-NEXT: sw s4, %lo(var+4)(s2) +; RV64IZCMP-NEXT: sw s3, %lo(var)(s2) +; RV64IZCMP-NEXT: mv a0, s1 +; RV64IZCMP-NEXT: addi sp, s0, -80 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s8}, 80 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: nocompress: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub s1, sp, a0 +; RV32I-NEXT: mv sp, s1 +; RV32I-NEXT: lui s2, %hi(var) +; RV32I-NEXT: lw s3, %lo(var)(s2) +; RV32I-NEXT: lw s4, %lo(var+4)(s2) +; RV32I-NEXT: lw s5, %lo(var+8)(s2) +; RV32I-NEXT: lw s6, %lo(var+12)(s2) +; RV32I-NEXT: addi s7, s2, %lo(var) +; RV32I-NEXT: lw s8, 16(s7) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: sw s8, 16(s7) +; RV32I-NEXT: sw s6, %lo(var+12)(s2) +; RV32I-NEXT: sw s5, %lo(var+8)(s2) +; RV32I-NEXT: sw s4, %lo(var+4)(s2) +; RV32I-NEXT: sw s3, %lo(var)(s2) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: addi sp, s0, -48 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: nocompress: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: .cfi_def_cfa_offset 80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: .cfi_offset s6, -64 +; RV64I-NEXT: .cfi_offset s7, -72 +; RV64I-NEXT: .cfi_offset s8, -80 +; RV64I-NEXT: addi s0, sp, 80 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub s1, sp, a0 +; RV64I-NEXT: mv sp, s1 +; RV64I-NEXT: lui s2, %hi(var) +; RV64I-NEXT: lw s3, %lo(var)(s2) +; RV64I-NEXT: lw s4, %lo(var+4)(s2) +; RV64I-NEXT: lw s5, %lo(var+8)(s2) +; RV64I-NEXT: lw s6, %lo(var+12)(s2) +; RV64I-NEXT: addi s7, s2, %lo(var) +; RV64I-NEXT: lw s8, 16(s7) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: sw s8, 16(s7) +; RV64I-NEXT: sw s6, %lo(var+12)(s2) +; RV64I-NEXT: sw s5, %lo(var+8)(s2) +; RV64I-NEXT: sw s4, %lo(var+4)(s2) +; RV64I-NEXT: sw s3, %lo(var)(s2) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: addi sp, s0, -80 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %val = load [5 x i32], [5 x i32]* @var + call void @callee_void(i8* nonnull %0) + store volatile [5 x i32] %val, [5 x i32]* @var + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +declare void @callee_void(i8*) +declare i32 @callee(i8*) + +declare i32 @foo_test_irq(...) +@var_test_irq = global [32 x i32] zeroinitializer + +define void @foo_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: foo_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: cm.pop {ra}, 64 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: foo_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: addi sp, sp, -64 +; RV64IZCMP-NEXT: sd t0, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 92(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 84(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 76(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 68(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 60(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 52(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 44(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 36(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 28(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 20(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 12(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 4(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: ld t6, 4(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 12(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 20(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 28(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 36(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 44(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 52(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 60(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 68(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 76(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 84(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 92(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 64 +; RV64IZCMP-NEXT: cm.pop {ra}, 64 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: foo_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: uret +; +; RV64I-LABEL: foo_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: uret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @foo_no_irq() nounwind{ +; RV32IZCMP-LABEL: foo_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -16 +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV64IZCMP-LABEL: foo_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -16 +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV32I-LABEL: foo_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @callee_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: callee_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 112 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: callee_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -112 +; RV64IZCMP-NEXT: sd t0, 212(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 204(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 196(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 188(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 180(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 172(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 164(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 156(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 148(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 140(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 132(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 124(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: ld t6, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 124(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 132(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 140(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 148(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 156(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 164(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 172(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 180(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 188(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 196(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 204(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 212(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 112 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: callee_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 +; RV32I-NEXT: uret +; +; RV64I-LABEL: callee_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -272 +; RV64I-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 256(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 248(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 240(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 232(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 224(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 216(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 232(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 224(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 272 +; RV64I-NEXT: uret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +} + +define void @callee_no_irq() nounwind{ +; RV32IZCMP-LABEL: callee_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; +; RV64IZCMP-LABEL: callee_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 +; +; RV32I-LABEL: callee_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 +; RV64I-NEXT: ret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +}