diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -36,6 +36,8 @@ RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVRedundantCopyElimination.cpp + RISCVMoveOptimizer.cpp + RISCVPushPopOptimizer.cpp RISCVRegisterInfo.cpp RISCVRVVInitUndef.cpp RISCVSubtarget.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -72,6 +72,12 @@ void initializeRISCVInitUndefPass(PassRegistry &); extern char &RISCVInitUndefID; +FunctionPass *createRISCVMoveOptimizationPass(); +void initializeRISCVMoveOptPass(PassRegistry &); + +FunctionPass *createRISCVPushPopOptimizationPass(); +void initializeRISCVPushPopOptPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -46,6 +46,9 @@ bool hasBP(const MachineFunction &MF) const; + bool isCSIpushable(const MachineFunction &MF, + const std::vector &CSI) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -251,6 +251,98 @@ return RestoreLibCalls[LibCallID]; } +// Return encoded value for PUSH/POP instruction, representing +// registers to store/load. +static int getPushPopEncoding(const Register MaxReg) { + switch (MaxReg) { + default: + llvm_unreachable("Unexcetped Reg for Push/Pop Inst"); + case /*s11*/ RISCV::X27: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S11; + case /*s9*/ RISCV::X25: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S9; + case /*s8*/ RISCV::X24: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S8; + case /*s7*/ RISCV::X23: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S7; + case /*s6*/ RISCV::X22: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S6; + case /*s5*/ RISCV::X21: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S5; + case /*s4*/ RISCV::X20: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S4; + case /*s3*/ RISCV::X19: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S3; + case /*s2*/ RISCV::X18: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S2; + case /*s1*/ RISCV::X9: + return llvm::RISCVZC::RLISTENCODE::RA_S0_S1; + case /*s0*/ RISCV::X8: + return llvm::RISCVZC::RLISTENCODE::RA_S0; + case /*ra*/ RISCV::X1: + return llvm::RISCVZC::RLISTENCODE::RA; + } +} + +void reallocPushStackFream(MachineFunction &MF) { + auto *RVFI = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + std::vector CSI = MFI.getCalleeSavedInfo(); + // realloc stack frame for PUSH + size_t NonePushStackOffset = -RVFI->getRVPushStackSize(); + for (const auto &Entry : CSI) { + int FrameIdx = Entry.getFrameIdx(); + Register Reg = Entry.getReg(); + if (!(Reg == RISCV::X26 || RISCV::PGPRRegClass.contains(Reg))) { + NonePushStackOffset -= MFI.getObjectSize(Entry.getFrameIdx()); + MFI.setObjectOffset(FrameIdx, NonePushStackOffset); + } + } +} + +static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI, + uint64_t StackAdj, bool isPop) { + // The spec allocates 2 bits to specify number of extra 16 byte blocks. + uint32_t AvailableAdj = 48; + uint64_t RequiredAdj = StackAdj; + + // Use available stack adjustment in Zc PUSH/POP instruction + // to allocate/deallocate space on stack. + int OpNum = MBBI->getNumOperands(); + auto &Operand = MBBI->getOperand(OpNum - 1); + int RegisterOffset = Operand.getImm(); + RequiredAdj -= RegisterOffset; + + if (RequiredAdj >= AvailableAdj) { + RequiredAdj -= AvailableAdj; + StackAdj = AvailableAdj; + } else { + // Round to the nearest 16 byte block able to fit RequiredAdj. + StackAdj = alignTo(RequiredAdj, 16); + RequiredAdj = 0; + } + Operand.setImm(StackAdj); + MBBI->setFlag(isPop ? MachineInstr::FrameDestroy : MachineInstr::FrameSetup); + return RequiredAdj; +} + +// Checks if Zc PUSH/POP instructions can be used with the given CSI. +bool RISCVFrameLowering::isCSIpushable( + const MachineFunction &MF, const std::vector &CSI) const { + if (!STI.hasStdExtZcmp() || CSI.empty() || + MF.getTarget().Options.DisableFramePointerElim(MF)) + return false; + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = + STI.getRegisterInfo()->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + return true; + } + return false; +} + // Return true if the specified function should have a dedicated frame // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has @@ -324,11 +416,11 @@ // Returns the register used to hold the stack pointer. static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } -static SmallVector +static std::vector getNonLibcallCSI(const MachineFunction &MF, const std::vector &CSI) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - SmallVector NonLibcallCSI; + std::vector NonLibcallCSI; for (auto &CS : CSI) { int FI = CS.getFrameIdx(); @@ -498,9 +590,39 @@ RealStackSize = FirstSPAdjustAmount; } - // Allocate space on the stack if necessary. - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize), - MachineInstr::FrameSetup, getStackAlign()); + const auto &CSI = MFI.getCalleeSavedInfo(); + bool PushEnabled = isCSIpushable(MF, CSI); + if (PushEnabled && (CSI.size() != 0)) { + // Check at what offset spilling of registers starts and allocate space + // before it. + int64_t preAdjustStack = 0; + for (auto CS : CSI) { + preAdjustStack = + std::min(preAdjustStack, -(MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx()))); + } + if (preAdjustStack != 0) + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getFixed(-preAdjustStack), + MachineInstr::FrameSetup, getStackAlign()); + StackSize -= preAdjustStack; + + // Use available stack adjustment in push instruction to allocate additional + // stack space. + StackSize = adjSPInPushPop(MBBI, StackSize, false); + if (StackSize != 0) { + RI->adjustReg(MBB, next_nodbg(MBBI, MBB.end()), DL, SPReg, SPReg, + StackOffset::getFixed(-StackSize), MachineInstr::FrameSetup, + getStackAlign()); + MBBI = next_nodbg(MBBI, MBB.end()); + reallocPushStackFream(MF); + } + } else { + // Allocate space on the stack if necessary. + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getFixed(-StackSize), MachineInstr::FrameSetup, + getStackAlign()); + } // Emit ".cfi_def_cfa_offset RealStackSize" unsigned CFIIndex = MF.addFrameInst( @@ -509,15 +631,16 @@ .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); - const auto &CSI = MFI.getCalleeSavedInfo(); - - // The frame pointer is callee-saved, and code has been generated for us to - // save it to the stack. We need to skip over the storing of callee-saved - // registers as the frame pointer must be modified after it has been saved - // to the stack, not before. - // FIXME: assumes exactly one instruction is used to save each callee-saved - // register. - std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); + if (PushEnabled) + std::advance(MBBI, 1); + else + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + // FIXME: assumes exactly one instruction is used to save each callee-saved + // register. + std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -670,7 +793,10 @@ // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; - if (!CSI.empty()) + bool PopEnabled = isCSIpushable(MF, CSI); + if (PopEnabled) + LastFrameDestroy = prev_nodbg(MBBI, MBB.begin()); + else if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); uint64_t StackSize = getStackSizeWithRVVPadding(MF); @@ -716,8 +842,33 @@ StackSize = FirstSPAdjustAmount; // Deallocate stack - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), - MachineInstr::FrameDestroy, getStackAlign()); + if (PopEnabled) { + // Check at what offset spilling of registers starts and calculate space + // before it. + int64_t preAdjustSize = 0; + for (auto CS : CSI) { + preAdjustSize = + std::min(preAdjustSize, -(MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx()))); + } + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, + StackOffset::getFixed(preAdjustSize), + MachineInstr::FrameDestroy, getStackAlign()); + StackSize -= preAdjustSize; + if (preAdjustSize != 0) + MBBI = prev_nodbg(MBBI, MBB.begin()); + + // Use available stack adjustment in pop instruction to deallocate stack + // space. + StackSize = adjSPInPushPop(prev_nodbg(MBBI, MBB.begin()), StackSize, true); + if (StackSize != 0) { + RI->adjustReg(MBB, prev_nodbg(MBBI, MBB.begin()), DL, SPReg, SPReg, + StackOffset::getFixed(StackSize), + MachineInstr::FrameDestroy, getStackAlign()); + } + } else + RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), + MachineInstr::FrameDestroy, getStackAlign()); // Emit epilogue for shadow call stack. emitSCSEpilogue(MF, MBB, MBBI, DL); @@ -1253,26 +1404,68 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - const char *SpillLibCall = getSpillLibCallName(*MF, CSI); - if (SpillLibCall) { - // Add spill libcall via non-callee-saved register t0. - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) - .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameSetup); + // Emmit CM.PUSH with base SPimm & evaluate Push stack + if (isCSIpushable(*MF, CSI.vec())) { + auto *RVFI = MF->getInfo(); + uint64_t PushStackSize = 0; + std::vector NonePushCSI; + Register MaxReg = RISCV::NoRegister; + + for (auto &CS : CSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) { + if (Reg != RISCV::X27) + PushStackSize += 4; + MaxReg = std::max(MaxReg.id(), Reg.id()); + } else if (Reg.id() == RISCV::X26) { + PushStackSize += 8; + MaxReg = RISCV::X27; + } else + NonePushCSI.push_back(CS); + } + RVFI->setRVPushStackSize(PushStackSize); + + MachineInstrBuilder PushBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH)); + // Use encoded number to represent registers to spill. + int RegEnc = getPushPopEncoding(MaxReg); + PushBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI); + PushBuilder.addImm(SpImmBase); + + for (auto &CS : NonePushCSI) { + Register Reg = CS.getReg(); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), + TRI->getMinimalPhysRegClass(Reg), TRI, + Register()); + } + } else { + const char *SpillLibCall = getSpillLibCallName(*MF, CSI); + if (SpillLibCall) { + // Add spill libcall via non-callee-saved register t0. + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) + .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameSetup); - // Add registers spilled in libcall as liveins. - for (auto &CS : CSI) - MBB.addLiveIn(CS.getReg()); - } + // Add registers spilled in libcall as liveins. + for (auto &CS : CSI) + MBB.addLiveIn(CS.getReg()); + } - // Manually spill values not spilled by libcall. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - // Insert the spill to the stack frame. - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), - RC, TRI, Register()); + // Manually spill values not spilled by libcall. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + // Insert the spill to the stack frame. + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), + CS.getFrameIdx(), RC, TRI, Register()); + } } return true; @@ -1290,37 +1483,65 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - // Manually restore values not restored by libcall. - // Keep the same order as in the prologue. There is no need to reverse the - // order in the epilogue. In addition, the return address will be restored - // first in the epilogue. It increases the opportunity to avoid the - // load-to-use data hazard between loading RA and return by RA. - // loadRegFromStackSlot can insert multiple instructions. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } + if (isCSIpushable(*MF, CSI.vec())) { + Register MaxReg = RISCV::NoRegister; + + for (auto &CS : reverse(CSI)) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + MaxReg = std::max(MaxReg.id(), Reg.id()); + else if (Reg.id() == RISCV::X26) { + MaxReg = RISCV::X27; + } else + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); + } - const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); - if (RestoreLibCall) { - // Add restore libcall via tail call. - MachineBasicBlock::iterator NewMI = - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) - .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameDestroy); - - // Remove trailing returns, since the terminator is now a tail call to the - // restore function. - if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { - NewMI->copyImplicitOps(*MF, *MI); - MI->eraseFromParent(); + MachineInstrBuilder PopBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_POP)); + // Use encoded number to represent registers to restore. + int RegEnc = getPushPopEncoding(MaxReg); + PopBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI); + PopBuilder.addImm(SpImmBase); + } else { + // Manually restore values not restored by libcall. + // Keep the same order as in the prologue. There is no need to reverse the + // order in the epilogue. In addition, the return address will be restored + // first in the epilogue. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. + // loadRegFromStackSlot can insert multiple instructions. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, + Register()); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); } - } + const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); + if (RestoreLibCall) { + // Add restore libcall via tail call. + MachineBasicBlock::iterator NewMI = + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) + .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameDestroy); + + // Remove trailing returns, since the terminator is now a tail call to the + // restore function. + if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { + NewMI->copyImplicitOps(*MF, *MI); + MI->eraseFromParent(); + } + } + } return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -126,6 +126,8 @@ bool isAsCheapAsAMove(const MachineInstr &MI) const override; + std::optional isLoadImmImpl(const MachineInstr &MI) const; + std::optional isCopyInstrImpl(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1271,6 +1271,24 @@ return MI.isAsCheapAsAMove(); } +std::optional +RISCVInstrInfo::isLoadImmImpl(const MachineInstr &MI) const { + if (MI.isMoveImmediate()) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + switch (MI.getOpcode()) { + default: + break; + case RISCV::ADDIW: + case RISCV::ADDI: + // Operand 1 can be a frameindex but callers expect registers + if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && + MI.getOperand(1).getReg() == RISCV::X0) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + break; + } + return std::nullopt; +} + std::optional RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { if (MI.isMoveReg()) @@ -1278,6 +1296,16 @@ switch (MI.getOpcode()) { default: break; + case RISCV::ADD: + if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isReg()) + break; + if ((MI.getOperand(1).getReg() == RISCV::X0) && + (MI.getOperand(2).getReg() != RISCV::X0)) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + if ((MI.getOperand(1).getReg() != RISCV::X0) && + (MI.getOperand(2).getReg() == RISCV::X0)) + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + break; case RISCV::ADDI: // Operand 1 can be a frameindex but callers expect registers if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -71,6 +71,9 @@ /// Registers that have been sign extended from i32. SmallVector SExt32Registers; + /// Size of stack frame for zcmp PUSH/POP + unsigned RVPushStackSize = 0; + public: RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {} @@ -107,7 +110,8 @@ // function uses a varargs save area, or is an interrupt handler. return MF.getSubtarget().enableSaveRestore() && VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() && - !MF.getFunction().hasFnAttribute("interrupt"); + !MF.getFunction().hasFnAttribute("interrupt") && + !MF.getSubtarget().hasStdExtZcmp(); } uint64_t getRVVStackSize() const { return RVVStackSize; } @@ -122,6 +126,9 @@ unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + uint64_t getRVPushStackSize() const { return RVPushStackSize; } + void setRVPushStackSize(uint64_t Size) { RVPushStackSize = Size; } + void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); void addSExt32Register(Register Reg); diff --git a/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp @@ -0,0 +1,252 @@ +//===---------- RISCVMoveOptimizer.cpp - RISCV move opt. pass -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs move related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_MOVE_OPT_NAME "RISC-V Zc move merging pass" + +namespace { +struct RISCVMoveOpt : public MachineFunctionPass { + static char ID; + + RISCVMoveOpt() : MachineFunctionPass(ID) { + initializeRISCVMoveOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool isCandidateToMergeMVA01S(DestSourcePair &RegPair); + bool isCandidateToMergeMVSA01(DestSourcePair &RegPair); + // Merge the two instructions indicated into a single pair instruction. + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, unsigned Opcode); + + // Look for C.MV instruction that can be combined with + // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching + // instruction if one exists. + MachineBasicBlock::iterator + findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode); + bool MovOpt(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return RISCV_MOVE_OPT_NAME; } +}; + +char RISCVMoveOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVMoveOpt, "riscv-mov-opt", RISCV_MOVE_OPT_NAME, false, + false) + +// Check if registers meet CM.MVA01S constraints. +bool RISCVMoveOpt::isCandidateToMergeMVA01S(DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *SourceRC = TRI->getMinimalPhysRegClass(Source); + // If destination is not a0 or a1. + if (Destination == RISCV::X10 || Destination == RISCV::X11) + if (RISCV::SR07RegClass.hasSubClassEq(SourceRC)) + return true; + return false; +} + +// Check if registers meet CM.MVSA01 constraints. +bool RISCVMoveOpt::isCandidateToMergeMVSA01(DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *DestinationRC = + TRI->getMinimalPhysRegClass(Destination); + // If Source is s0 - s7. + if (RISCV::SR07RegClass.hasSubClassEq(DestinationRC)) + if (Source == RISCV::X10 || Source == RISCV::X11) + return true; + return false; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + unsigned Opcode) { + const MachineOperand *Sreg1, *Sreg2; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value(); + DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value(); + Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S + ? FirstPair.Destination->getReg() + : FirstPair.Source->getReg(); + + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + DebugLoc DL = I->getDebugLoc(); + + // The order of S-reg depends on which instruction holds A0, instead of + // the order of register pair. + // e,g. + // mv a1, s1 + // mv a0, s2 => cm.mva01s s2,s1 + // + // mv a0, s2 + // mv a1, s1 => cm.mva01s s2,s1 + if (Opcode == RISCV::CM_MVA01S) { + Sreg1 = + ARegInFirstPair == RISCV::X10 ? FirstPair.Source : PairedRegs.Source; + Sreg2 = + ARegInFirstPair == RISCV::X10 ? PairedRegs.Source : FirstPair.Source; + } else { + Sreg1 = ARegInFirstPair == RISCV::X10 ? FirstPair.Destination + : PairedRegs.Destination; + Sreg2 = ARegInFirstPair == RISCV::X10 ? PairedRegs.Destination + : FirstPair.Destination; + } + + BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2); + + I->eraseFromParent(); + Paired->eraseFromParent(); + return NextI; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::findMatchingInst(MachineBasicBlock::iterator &MBBI, + unsigned InstOpcode) { + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*MBBI).value(); + + // Track which register units have been modified and used between the first + // insn and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + for (MachineBasicBlock::iterator I = next_nodbg(MBBI, E); I != E; + I = next_nodbg(I, E)) { + + MachineInstr &MI = *I; + + if (auto SecondPair = TII->isCopyInstrImpl(MI)) { + Register SourceReg = SecondPair->Source->getReg(); + Register DestReg = SecondPair->Destination->getReg(); + + if (InstOpcode == RISCV::CM_MVA01S && + isCandidateToMergeMVA01S(*SecondPair)) { + // If register pair is valid and destination registers are different. + if ((FirstPair.Destination->getReg() == DestReg)) + return E; + + // If paired destination register was modified or used, there is no + // possibility of finding matching instruction so exit early. + if (!ModifiedRegUnits.available(DestReg) || + !UsedRegUnits.available(DestReg)) + return E; + + // We need to check if the source register in the second paired + // instruction is defined in between. + if (ModifiedRegUnits.available(SourceReg)) + return I; + + } else if (InstOpcode == RISCV::CM_MVSA01 && + isCandidateToMergeMVSA01(*SecondPair)) { + if ((FirstPair.Source->getReg() == SourceReg) || + (FirstPair.Destination->getReg() == DestReg)) + return E; + + if (!ModifiedRegUnits.available(SourceReg) || + !UsedRegUnits.available(SourceReg)) + return E; + + // As for mvsa01, we need to make sure the dest register of the second + // paired instruction is not used in between, since we would move its + // definition ahead. + if (UsedRegUnits.available(DestReg)) + return I; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + } + return E; +} + +// Finds instructions, which could be represented as C.MV instructions and +// merged into CM.MVA01S or CM.MVSA01. +bool RISCVMoveOpt::MovOpt(MachineBasicBlock &MBB) { + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + // Check if the instruction can be compressed to C.MV instruction. If it + // can, return Dest/Src register pair. + auto RegPair = TII->isCopyInstrImpl(*MBBI); + if (RegPair.has_value()) { + unsigned Opcode = 0; + + if (isCandidateToMergeMVA01S(*RegPair)) + Opcode = RISCV::CM_MVA01S; + else if (isCandidateToMergeMVSA01(*RegPair)) + Opcode = RISCV::CM_MVSA01; + else { + ++MBBI; + continue; + } + + MachineBasicBlock::iterator Paired = findMatchingInst(MBBI, Opcode); + // If matching instruction could be found merge them. + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, Opcode); + Modified = true; + continue; + } + } + ++MBBI; + } + return Modified; +} + +bool RISCVMoveOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we optimize a + // move. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + Modified |= MovOpt(MBB); + } + return Modified; +} + +/// createRISCVMoveOptimizationPass - returns an instance of the +/// move optimization pass. +FunctionPass *llvm::createRISCVMoveOptimizationPass() { + return new RISCVMoveOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp @@ -0,0 +1,168 @@ +//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that modifies PUSH/POP instructions from Zca +// standard to use their non prolog/epilog related functionalities +// and generates POPRET instruction. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zc Push/Pop optimization pass" + +namespace { +struct RISCVPushPopOpt : public MachineFunctionPass { + static char ID; + + RISCVPushPopOpt() : MachineFunctionPass(ID) { + initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool usePopRet(MachineBasicBlock::iterator &MBBI); + bool adjustRetVal(MachineBasicBlock::iterator &MBBI); + bool runOnMachineFunction(MachineFunction &Fn) override; + + std::map retValMap; + + StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; } +}; + +char RISCVPushPopOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME, + false, false) + +// Check if POP instruction was inserted into the MBB and return iterator to it. +static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); + MBBI = next_nodbg(MBBI, MBB.end())) + if (MBBI->getOpcode() == RISCV::CM_POP) + return MBBI; + + return MBB.end(); +} + +bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI) { + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(MBBI, E); + // Since Pseudo instruction lowering happen later in the pipeline, + // this will detect all ret instruction. + if (NextI->getOpcode() == RISCV::PseudoRET) { + DebugLoc DL = NextI->getDebugLoc(); + auto retValInfo = retValMap.find(&(*MBBI)); + if (retValInfo == retValMap.end()) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRET)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + else if (retValInfo->second == 0) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRETZ)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + // If the return value is not 0 then POPRETZ is not used. + else + return false; + MBBI->eraseFromParent(); + NextI->eraseFromParent(); + return true; + } + return false; +} + +// Search for last assignment to a0 and if possible use ret_val slot of POP to +// store return value. +bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) { + MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend(); + // Track which register units have been modified and used between the POP + // insn and the last assignment to register a0. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + retValMap.clear(); + Register A0(RISCV::X10); + // Since POP instruction is in Epilogue no normal instructions will follow + // after it. Therefore search only previous ones to find the return value. + for (MachineBasicBlock::reverse_iterator I = + next_nodbg(MBBI.getReverse(), RE); + I != RE; I = next_nodbg(I, RE)) { + MachineInstr &MI = *I; + if (auto OperandPair = TII->isLoadImmImpl(MI)) { + Register DestReg = OperandPair->Destination->getReg(); + int64_t RetVal = OperandPair->Source->getImm(); + if (DestReg == RISCV::X10) { + switch (RetVal) { + default: + return false; + case 0: + retValMap[&(*MBBI)] = 0; + } + MI.removeFromParent(); + return true; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + // If a0 was modified or used, there is no possibility + // of using ret_val slot of POP instruction. + if (!ModifiedRegUnits.available(A0) || !UsedRegUnits.available(A0)) + return false; + } + return false; +} + +bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + // If Zca extension is not supported, abort. + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + + // If frame pointer elimination has been disabled, + // abort to avoid breaking the ABI. + if (Fn.getTarget().Options.DisableFramePointerElim(Fn)) { + return false; + } + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we determine + // correct return value for the POP. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + auto MBBI = containsPop(MBB); + if (MBBI != MBB.end()) { + Modified |= adjustRetVal(MBBI); + if (MBB.isReturnBlock()) + Modified |= usePopRet(MBBI); + } + } + return Modified; +} + +/// createRISCVPushPopOptimizationPass - returns an instance of the +/// Push/Pop optimization pass. +FunctionPass *llvm::createRISCVPushPopOptimizationPass() { + return new RISCVPushPopOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -183,6 +183,16 @@ let RegInfos = XLenRI; } +// Registers saveable by PUSH instruction in Zce extension +def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 25), + X27, + X1 + )> { + let RegInfos = XLenRI; +} + // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -86,6 +86,8 @@ initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); initializeRISCVInitUndefPass(*PR); + initializeRISCVMoveOptPass(*PR); + initializeRISCVPushPopOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -347,6 +349,10 @@ } void RISCVPassConfig::addPreEmitPass2() { + addPass(createRISCVMoveOptimizationPass()); + // Schedule PushPop Optimization before expansion of Pseudo instruction, + // ensuring return instruction is detected correctly. + addPass(createRISCVPushPopOptimizationPass()); addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -63,6 +63,8 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: RISC-V Zc move merging pass +; CHECK-NEXT: RISC-V Zc Push/Pop optimization pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -176,6 +176,8 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: RISC-V Zc move merging pass +; CHECK-NEXT: RISC-V Zc Push/Pop optimization pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -52,6 +52,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCB %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCD %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcf %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCF %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCMP %s ; RUN: llc -mtriple=riscv32 -mattr=+zicsr %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICSR %s ; RUN: llc -mtriple=riscv32 -mattr=+zifencei %s -o - | FileCheck --check-prefixes=CHECK,RV32ZIFENCEI %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfa %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFA %s @@ -124,6 +125,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV64ZCA %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV64ZCB %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV64ZCD %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefixes=CHECK,RV64ZCMP %s ; RUN: llc -mtriple=riscv64 -mattr=+zicsr %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICSR %s ; RUN: llc -mtriple=riscv64 -mattr=+zifencei %s -o - | FileCheck --check-prefixes=CHECK,RV64ZIFENCEI %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfa %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFA %s @@ -191,6 +193,7 @@ ; RV32ZCB: .attribute 5, "rv32i2p1_zca1p0_zcb1p0" ; RV32ZCD: .attribute 5, "rv32i2p1_zcd1p0" ; RV32ZCF: .attribute 5, "rv32i2p1_zcf1p0" +; RV32ZCMP: .attribute 5, "rv32i2p1_zca1p0_zcmp1p0" ; RV32ZICSR: .attribute 5, "rv32i2p1_zicsr2p0" ; RV32ZIFENCEI: .attribute 5, "rv32i2p1_zifencei2p0" ; RV32ZFA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfa0p1" @@ -262,6 +265,7 @@ ; RV64ZCA: .attribute 5, "rv64i2p1_zca1p0" ; RV64ZCB: .attribute 5, "rv64i2p1_zca1p0_zcb1p0" ; RV64ZCD: .attribute 5, "rv64i2p1_zcd1p0" +; RV64ZCMP: .attribute 5, "rv64i2p1_zca1p0_zcmp1p0" ; RV64ZICSR: .attribute 5, "rv64i2p1_zicsr2p0" ; RV64ZIFENCEI: .attribute 5, "rv64i2p1_zifencei2p0" ; RV64ZFA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfa0p1" diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -9,6 +9,10 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I-WITH-FP +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP-WITH-FP ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -19,6 +23,10 @@ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I-WITH-FP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP-WITH-FP @var = global [32 x i32] zeroinitializer @@ -249,6 +257,202 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 80 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: callee: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t4, 24(a5) +; RV32IZCMP-NEXT: lw t5, 28(a5) +; RV32IZCMP-NEXT: lw t6, 32(a5) +; RV32IZCMP-NEXT: lw s2, 36(a5) +; RV32IZCMP-NEXT: lw s3, 40(a5) +; RV32IZCMP-NEXT: lw s4, 44(a5) +; RV32IZCMP-NEXT: lw s5, 48(a5) +; RV32IZCMP-NEXT: lw s6, 52(a5) +; RV32IZCMP-NEXT: lw s7, 56(a5) +; RV32IZCMP-NEXT: lw s8, 60(a5) +; RV32IZCMP-NEXT: lw s9, 64(a5) +; RV32IZCMP-NEXT: lw s10, 68(a5) +; RV32IZCMP-NEXT: lw s11, 72(a5) +; RV32IZCMP-NEXT: lw ra, 76(a5) +; RV32IZCMP-NEXT: lw s1, 80(a5) +; RV32IZCMP-NEXT: lw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 88(a5) +; RV32IZCMP-NEXT: lw t1, 92(a5) +; RV32IZCMP-NEXT: lw t0, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw t1, 92(a5) +; RV32IZCMP-NEXT: sw t2, 88(a5) +; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: sw s1, 80(a5) +; RV32IZCMP-NEXT: sw ra, 76(a5) +; RV32IZCMP-NEXT: sw s11, 72(a5) +; RV32IZCMP-NEXT: sw s10, 68(a5) +; RV32IZCMP-NEXT: sw s9, 64(a5) +; RV32IZCMP-NEXT: sw s8, 60(a5) +; RV32IZCMP-NEXT: sw s7, 56(a5) +; RV32IZCMP-NEXT: sw s6, 52(a5) +; RV32IZCMP-NEXT: sw s5, 48(a5) +; RV32IZCMP-NEXT: sw s4, 44(a5) +; RV32IZCMP-NEXT: sw s3, 40(a5) +; RV32IZCMP-NEXT: sw s2, 36(a5) +; RV32IZCMP-NEXT: sw t6, 32(a5) +; RV32IZCMP-NEXT: sw t5, 28(a5) +; RV32IZCMP-NEXT: sw t4, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; +; RV32IZCMP-WITH-FP-LABEL: callee: +; RV32IZCMP-WITH-FP: # %bb.0: +; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, -80 +; RV32IZCMP-WITH-FP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 80 +; RV32IZCMP-WITH-FP-NEXT: lui a7, %hi(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a7) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a7) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a7) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a7) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: addi a5, a7, %lo(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 24(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw t5, 28(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t6, 32(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s2, 36(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s3, 40(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s4, 44(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s5, 48(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s6, 52(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s7, 56(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s8, 60(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s9, 64(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s10, 68(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s11, 72(a5) +; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t4, 80(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a5) +; RV32IZCMP-WITH-FP-NEXT: lw s1, 92(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t1, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t0, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a6, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a4, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, 124(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a1, 120(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a2, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a3, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a0, 124(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a1, 120(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a2, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a3, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a4, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a6, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t0, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t1, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s1, 92(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t4, 80(a5) +; RV32IZCMP-WITH-FP-NEXT: sw ra, 76(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s11, 72(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s10, 68(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s9, 64(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s8, 60(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s7, 56(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s6, 52(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s5, 48(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s4, 44(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s3, 40(a5) +; RV32IZCMP-WITH-FP-NEXT: sw s2, 36(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t6, 32(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t5, 28(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 24(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 20(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a7) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a7) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a7) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a7) +; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, 80 +; RV32IZCMP-WITH-FP-NEXT: ret +; ; RV64I-LABEL: callee: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -160 @@ -471,6 +675,202 @@ ; RV64I-WITH-FP-NEXT: ld s11, 56(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 160 ; RV64I-WITH-FP-NEXT: ret +; +; RV64IZCMP-LABEL: callee: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t4, 24(a5) +; RV64IZCMP-NEXT: lw t5, 28(a5) +; RV64IZCMP-NEXT: lw t6, 32(a5) +; RV64IZCMP-NEXT: lw s2, 36(a5) +; RV64IZCMP-NEXT: lw s3, 40(a5) +; RV64IZCMP-NEXT: lw s4, 44(a5) +; RV64IZCMP-NEXT: lw s5, 48(a5) +; RV64IZCMP-NEXT: lw s6, 52(a5) +; RV64IZCMP-NEXT: lw s7, 56(a5) +; RV64IZCMP-NEXT: lw s8, 60(a5) +; RV64IZCMP-NEXT: lw s9, 64(a5) +; RV64IZCMP-NEXT: lw s10, 68(a5) +; RV64IZCMP-NEXT: lw s11, 72(a5) +; RV64IZCMP-NEXT: lw ra, 76(a5) +; RV64IZCMP-NEXT: lw s1, 80(a5) +; RV64IZCMP-NEXT: lw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 88(a5) +; RV64IZCMP-NEXT: lw t1, 92(a5) +; RV64IZCMP-NEXT: lw t0, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw t1, 92(a5) +; RV64IZCMP-NEXT: sw t2, 88(a5) +; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: sw s1, 80(a5) +; RV64IZCMP-NEXT: sw ra, 76(a5) +; RV64IZCMP-NEXT: sw s11, 72(a5) +; RV64IZCMP-NEXT: sw s10, 68(a5) +; RV64IZCMP-NEXT: sw s9, 64(a5) +; RV64IZCMP-NEXT: sw s8, 60(a5) +; RV64IZCMP-NEXT: sw s7, 56(a5) +; RV64IZCMP-NEXT: sw s6, 52(a5) +; RV64IZCMP-NEXT: sw s5, 48(a5) +; RV64IZCMP-NEXT: sw s4, 44(a5) +; RV64IZCMP-NEXT: sw s3, 40(a5) +; RV64IZCMP-NEXT: sw s2, 36(a5) +; RV64IZCMP-NEXT: sw t6, 32(a5) +; RV64IZCMP-NEXT: sw t5, 28(a5) +; RV64IZCMP-NEXT: sw t4, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 +; +; RV64IZCMP-WITH-FP-LABEL: callee: +; RV64IZCMP-WITH-FP: # %bb.0: +; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, -160 +; RV64IZCMP-WITH-FP-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 160 +; RV64IZCMP-WITH-FP-NEXT: lui a7, %hi(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a7) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a7) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a7) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a7) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: addi a5, a7, %lo(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, 16(a5) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 20(a5) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 24(a5) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw t5, 28(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t6, 32(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s2, 36(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s3, 40(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s4, 44(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s5, 48(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s6, 52(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s7, 56(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s8, 60(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s9, 64(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s10, 68(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s11, 72(a5) +; RV64IZCMP-WITH-FP-NEXT: lw ra, 76(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t4, 80(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t3, 84(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t2, 88(a5) +; RV64IZCMP-WITH-FP-NEXT: lw s1, 92(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t1, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t0, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a6, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a4, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a0, 124(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a1, 120(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a2, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a3, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a0, 124(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a1, 120(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a2, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a3, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a4, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a6, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t0, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t1, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s1, 92(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t2, 88(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t3, 84(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t4, 80(a5) +; RV64IZCMP-WITH-FP-NEXT: sw ra, 76(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s11, 72(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s10, 68(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s9, 64(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s8, 60(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s7, 56(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s6, 52(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s5, 48(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s4, 44(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s3, 40(a5) +; RV64IZCMP-WITH-FP-NEXT: sw s2, 36(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t6, 32(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t5, 28(a5) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 24(a5) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 20(a5) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 16(a5) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a7) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a7) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a7) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a7) +; RV64IZCMP-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, 160 +; RV64IZCMP-WITH-FP-NEXT: ret %val = load [32 x i32], ptr @var store volatile [32 x i32] %val, ptr @var ret void @@ -769,6 +1169,270 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 144 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: caller: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: lui s0, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi s1, s0, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(s1) +; RV32IZCMP-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(s1) +; RV32IZCMP-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 24(s1) +; RV32IZCMP-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 28(s1) +; RV32IZCMP-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 32(s1) +; RV32IZCMP-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 36(s1) +; RV32IZCMP-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 40(s1) +; RV32IZCMP-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 44(s1) +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 48(s1) +; RV32IZCMP-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 52(s1) +; RV32IZCMP-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 56(s1) +; RV32IZCMP-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 60(s1) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 64(s1) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 68(s1) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 72(s1) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 76(s1) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 80(s1) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 84(s1) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw s4, 88(s1) +; RV32IZCMP-NEXT: lw s5, 92(s1) +; RV32IZCMP-NEXT: lw s6, 96(s1) +; RV32IZCMP-NEXT: lw s7, 100(s1) +; RV32IZCMP-NEXT: lw s8, 104(s1) +; RV32IZCMP-NEXT: lw s9, 108(s1) +; RV32IZCMP-NEXT: lw s10, 112(s1) +; RV32IZCMP-NEXT: lw s11, 116(s1) +; RV32IZCMP-NEXT: lw s2, 120(s1) +; RV32IZCMP-NEXT: lw s3, 124(s1) +; RV32IZCMP-NEXT: call callee@plt +; RV32IZCMP-NEXT: sw s3, 124(s1) +; RV32IZCMP-NEXT: sw s2, 120(s1) +; RV32IZCMP-NEXT: sw s11, 116(s1) +; RV32IZCMP-NEXT: sw s10, 112(s1) +; RV32IZCMP-NEXT: sw s9, 108(s1) +; RV32IZCMP-NEXT: sw s8, 104(s1) +; RV32IZCMP-NEXT: sw s7, 100(s1) +; RV32IZCMP-NEXT: sw s6, 96(s1) +; RV32IZCMP-NEXT: sw s5, 92(s1) +; RV32IZCMP-NEXT: sw s4, 88(s1) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 84(s1) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 80(s1) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 76(s1) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 72(s1) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 68(s1) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 64(s1) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 60(s1) +; RV32IZCMP-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 56(s1) +; RV32IZCMP-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 52(s1) +; RV32IZCMP-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 48(s1) +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 44(s1) +; RV32IZCMP-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 40(s1) +; RV32IZCMP-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 36(s1) +; RV32IZCMP-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 32(s1) +; RV32IZCMP-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 28(s1) +; RV32IZCMP-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 24(s1) +; RV32IZCMP-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(s1) +; RV32IZCMP-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(s1) +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 112 +; +; RV32IZCMP-WITH-FP-LABEL: caller: +; RV32IZCMP-WITH-FP: # %bb.0: +; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, -144 +; RV32IZCMP-WITH-FP-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 144 +; RV32IZCMP-WITH-FP-NEXT: lui s6, %hi(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: addi s1, s6, %lo(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 24(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 28(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -84(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 32(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -88(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 36(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -92(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 40(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -96(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 44(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -100(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 48(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -104(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 52(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -108(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 56(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -112(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 60(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -116(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 64(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -120(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 68(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -124(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 72(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -128(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 76(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -132(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 80(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -136(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 84(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -140(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw a0, 88(s1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, -144(s0) # 4-byte Folded Spill +; RV32IZCMP-WITH-FP-NEXT: lw s8, 92(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s9, 96(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s10, 100(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s11, 104(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s2, 108(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s3, 112(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s4, 116(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s5, 120(s1) +; RV32IZCMP-WITH-FP-NEXT: lw s7, 124(s1) +; RV32IZCMP-WITH-FP-NEXT: call callee@plt +; RV32IZCMP-WITH-FP-NEXT: sw s7, 124(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s5, 120(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s4, 116(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s3, 112(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s2, 108(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s11, 104(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s10, 100(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s9, 96(s1) +; RV32IZCMP-WITH-FP-NEXT: sw s8, 92(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -144(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 88(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -140(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 84(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -136(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 80(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -132(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 76(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -128(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 72(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -124(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 68(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -120(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 64(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -116(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 60(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -112(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 56(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -108(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 52(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -104(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 48(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -100(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 44(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -96(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 40(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -92(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 36(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -88(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 32(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -84(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 28(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 24(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 20(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(s1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV32IZCMP-WITH-FP-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IZCMP-WITH-FP-NEXT: addi sp, sp, 144 +; RV32IZCMP-WITH-FP-NEXT: ret +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -288 @@ -1057,7 +1721,270 @@ ; RV64I-WITH-FP-NEXT: ld s11, 184(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 288 ; RV64I-WITH-FP-NEXT: ret - +; +; RV64IZCMP-LABEL: caller: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -128 +; RV64IZCMP-NEXT: lui s0, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi s1, s0, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(s1) +; RV64IZCMP-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(s1) +; RV64IZCMP-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 24(s1) +; RV64IZCMP-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 28(s1) +; RV64IZCMP-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 32(s1) +; RV64IZCMP-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 36(s1) +; RV64IZCMP-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 40(s1) +; RV64IZCMP-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 44(s1) +; RV64IZCMP-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 48(s1) +; RV64IZCMP-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 52(s1) +; RV64IZCMP-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 56(s1) +; RV64IZCMP-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 60(s1) +; RV64IZCMP-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 64(s1) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 68(s1) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 72(s1) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 76(s1) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 80(s1) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 84(s1) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw s4, 88(s1) +; RV64IZCMP-NEXT: lw s5, 92(s1) +; RV64IZCMP-NEXT: lw s6, 96(s1) +; RV64IZCMP-NEXT: lw s7, 100(s1) +; RV64IZCMP-NEXT: lw s8, 104(s1) +; RV64IZCMP-NEXT: lw s9, 108(s1) +; RV64IZCMP-NEXT: lw s10, 112(s1) +; RV64IZCMP-NEXT: lw s11, 116(s1) +; RV64IZCMP-NEXT: lw s2, 120(s1) +; RV64IZCMP-NEXT: lw s3, 124(s1) +; RV64IZCMP-NEXT: call callee@plt +; RV64IZCMP-NEXT: sw s3, 124(s1) +; RV64IZCMP-NEXT: sw s2, 120(s1) +; RV64IZCMP-NEXT: sw s11, 116(s1) +; RV64IZCMP-NEXT: sw s10, 112(s1) +; RV64IZCMP-NEXT: sw s9, 108(s1) +; RV64IZCMP-NEXT: sw s8, 104(s1) +; RV64IZCMP-NEXT: sw s7, 100(s1) +; RV64IZCMP-NEXT: sw s6, 96(s1) +; RV64IZCMP-NEXT: sw s5, 92(s1) +; RV64IZCMP-NEXT: sw s4, 88(s1) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 84(s1) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 80(s1) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 76(s1) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 72(s1) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 68(s1) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 64(s1) +; RV64IZCMP-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 60(s1) +; RV64IZCMP-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 56(s1) +; RV64IZCMP-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 52(s1) +; RV64IZCMP-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 48(s1) +; RV64IZCMP-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 44(s1) +; RV64IZCMP-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 40(s1) +; RV64IZCMP-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 36(s1) +; RV64IZCMP-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 32(s1) +; RV64IZCMP-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 28(s1) +; RV64IZCMP-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 24(s1) +; RV64IZCMP-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(s1) +; RV64IZCMP-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(s1) +; RV64IZCMP-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: addi sp, sp, 128 +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 +; +; RV64IZCMP-WITH-FP-LABEL: caller: +; RV64IZCMP-WITH-FP: # %bb.0: +; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, -288 +; RV64IZCMP-WITH-FP-NEXT: sd ra, 280(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s0, 272(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s1, 264(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s2, 256(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s3, 248(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s4, 240(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s5, 232(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s6, 224(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s7, 216(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s8, 208(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s9, 200(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s10, 192(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: sd s11, 184(sp) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 288 +; RV64IZCMP-WITH-FP-NEXT: lui s6, %hi(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: addi s1, s6, %lo(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, 16(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 20(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 24(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 28(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -168(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 32(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -176(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 36(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -184(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 40(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -192(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 44(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -200(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 48(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -208(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 52(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -216(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 56(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -224(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 60(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -232(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 64(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -240(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 68(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -248(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 72(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -256(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 76(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -264(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 80(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -272(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 84(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -280(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw a0, 88(s1) +; RV64IZCMP-WITH-FP-NEXT: sd a0, -288(s0) # 8-byte Folded Spill +; RV64IZCMP-WITH-FP-NEXT: lw s8, 92(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s9, 96(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s10, 100(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s11, 104(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s2, 108(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s3, 112(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s4, 116(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s5, 120(s1) +; RV64IZCMP-WITH-FP-NEXT: lw s7, 124(s1) +; RV64IZCMP-WITH-FP-NEXT: call callee@plt +; RV64IZCMP-WITH-FP-NEXT: sw s7, 124(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s5, 120(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s4, 116(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s3, 112(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s2, 108(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s11, 104(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s10, 100(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s9, 96(s1) +; RV64IZCMP-WITH-FP-NEXT: sw s8, 92(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -288(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 88(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -280(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 84(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -272(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 80(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -264(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 76(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -256(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 72(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -248(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 68(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -240(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 64(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -232(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 60(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -224(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 56(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -216(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 52(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -208(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 48(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -200(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 44(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -192(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 40(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -184(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 36(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -176(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 32(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -168(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 28(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 24(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 20(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, 16(s1) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV64IZCMP-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV64IZCMP-WITH-FP-NEXT: ld ra, 280(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s0, 272(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s1, 264(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s2, 256(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s3, 248(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s5, 232(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s6, 224(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s7, 216(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s8, 208(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s9, 200(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s10, 192(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: ld s11, 184(sp) # 8-byte Folded Reload +; RV64IZCMP-WITH-FP-NEXT: addi sp, sp, 288 +; RV64IZCMP-WITH-FP-NEXT: ret %val = load [32 x i32], ptr @var call void @callee() store volatile [32 x i32] %val, ptr @var diff --git a/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll @@ -0,0 +1,147 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID,VALID32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID + + +; Function Attrs: nounwind +define dso_local i32 @cmva(i32 %num, i32 %f, i32 %d, i32 %dx) local_unnamed_addr #0 { +; VALID-LABEL: cmva: +; VALID: cm.mva01s {{s[0-7]}}, {{s[0-7]}} +; VALID-NOT: cm.mva01s {{a.}}, {{a.}} +entry: + %mul = mul nsw i32 %dx, %d + %sub = sub nsw i32 %mul, %dx + %add = add nsw i32 %mul, %d + %mul2 = mul nsw i32 %sub, %dx + %add3 = add nsw i32 %add, %mul2 + %mul4 = mul nsw i32 %add3, %d + %add6 = add nsw i32 %add3, %num + %add5 = add i32 %sub, %f + %add7 = add i32 %add5, %mul4 + ret i32 %add7 +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) + +define i64 @cmvs32(i64 %a) nounwind { +; VALID32-LABEL: cmvs32: +; VALID32: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID32-NOT: cm.mvsa01 {{a.}}, {{a.}} + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +declare void @hoge() +define void @cmvs64(i32 signext %arg, i32 signext %arg1) nounwind { +; VALID64-LABEL: cmvs: +; VALID64: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID64-NOT: cm.mvsa01 {{a.}}, {{a.}} +bb: + %tmp = icmp eq i32 %arg, %arg1 + br i1 %tmp, label %bb6, label %bb2 + +bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ] + tail call void @hoge() + %tmp4 = add nsw i32 %tmp3, 1 + %tmp5 = icmp eq i32 %tmp4, %arg1 + br i1 %tmp5, label %bb6, label %bb2 + +bb6: ; preds = %bb2, %bb + ret void +} + +%struct.trie = type { [26 x %struct.trie*], i8 } + +@word = external global i8* + +declare i32 @trie_new(%struct.trie*) +declare i32 @trie_search(i8*, i32, %struct.trie**) +declare i64 @strnlen(i8*, i64) + +; Function Attrs: nounwind optnone +define i32 @mvas_2() { + ; VALID64-LABEL: mvas_2: + ; VALID64-NOT: cm.mva01s {{a.}}, {{s.}} + ; VALID64-NOT: cm.mva01s {{s.}}, {{a.}} +entry: + %trie = alloca %struct.trie* + %0 = bitcast %struct.trie** %trie to i8* + store %struct.trie* null, %struct.trie** %trie + %call = tail call i32 @trie_new(%struct.trie* null) + %1 = load i8*, i8** @word + %call1 = tail call i64 @strnlen(i8* %1, i64 100) + %conv = trunc i64 %call1 to i32 + %call2 = call i32 @trie_search(i8* %1, i32 %conv, %struct.trie** %trie) + ret i32 %call2 +} + +declare i32 @foo(i32, i32) + +; Function Attrs: nounwind optnone +define dso_local i32 @cm_mvas_same_src(i32 %0, i32 %1, i32 %2, i32 %3) { + ; VALID32-LABEL: cm_mvas_same_src: + ; VALID32: cm.mva01s s0, s0 + ; + ; VALID64-LABEL: cm_mvas_same_src: + ; VALID64: cm.mva01s s0, s0 +entry: + %4 = call i32 @foo(i32 %3, i32 %2) + %5 = add i32 %4, %2 + %6 = call i32 @foo(i32 %3, i32 %3) + %add = add i32 %5, %6 + ret i32 %add +} + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +%struct.Node = type { i8*, i64, %struct.Node*, %struct.Node* } + +declare i8* @malloc(i64) + +declare i32 @fgetc(%struct._IO_FILE*nocapture) + +declare %struct.Node* @addWordToTree(i8*, %struct.Node*) + +; Function Attrs: nounwind optsize +define %struct.Node* @cmmv_a1_come_first(%struct._IO_FILE*nocapture %file, %struct.Node* %root) { +; VALID32-LABEL: cm.mvsa01 +; VALID32: cm.mva01s {{s[0-7]}}, {{s[0-7]}} + +; VALID64-LABEL: cm.mvsa01 +; VALID64: cm.mva01s {{s[0-7]}}, {{s[0-7]}} +entry: + %call = tail call dereferenceable_or_null(46) i8* @malloc(i64 46) + %arrayidx = getelementptr inbounds i8, i8* %call, i64 -1 + %call117 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask18 = and i32 %call117, 255 + %cmp.not19 = icmp eq i32 %sext.mask18, 255 + br i1 %cmp.not19, label %while.end, label %land.lhs.true.preheader + +land.lhs.true.preheader: + %arrayidx921 = getelementptr inbounds i8, i8* %call, i64 255 + store i8 0, i8* %arrayidx921 + %call1022 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %root) + %call123 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask24 = and i32 %call123, 255 + %cmp.not25 = icmp eq i32 %sext.mask24, 255 + br i1 %cmp.not25, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +land.lhs.true.land.lhs.true_crit_edge: + %call1026 = phi %struct.Node* [ %call10, %land.lhs.true.land.lhs.true_crit_edge ], [ %call1022, %land.lhs.true.preheader ] + %.pre = load i8, i8* %arrayidx + %cmp6.not = icmp eq i8 %.pre, 39 + %spec.select = select i1 %cmp6.not, i64 0, i64 255 + %arrayidx9 = getelementptr inbounds i8, i8* %call, i64 %spec.select + store i8 0, i8* %arrayidx9 + %call10 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %call1026) + %call1 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask = and i32 %call1, 255 + %cmp.not = icmp eq i32 %sext.mask, 255 + br i1 %cmp.not, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +while.end: + %root.addr.0.lcssa = phi %struct.Node* [ %root, %entry ], [ %call1022, %land.lhs.true.preheader ], [ %call10, %land.lhs.true.land.lhs.true_crit_edge ] + ret %struct.Node* %root.addr.0.lcssa +} diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -0,0 +1,1815 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Check cm.push/cm.pop. +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I %s + +declare void @test(i8*) + +; Function Attrs: optnone +define i32 @foo() { +; RV32IZCMP-LABEL: foo: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV32IZCMP-NEXT: addi sp, sp, -464 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: addi a0, sp, 12 +; RV32IZCMP-NEXT: call test@plt +; RV32IZCMP-NEXT: addi sp, sp, 464 +; RV32IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV64IZCMP-LABEL: foo: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV64IZCMP-NEXT: addi sp, sp, -464 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: addi a0, sp, 8 +; RV64IZCMP-NEXT: call test@plt +; RV64IZCMP-NEXT: addi sp, sp, 464 +; RV64IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV32I-LABEL: foo: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -528 +; RV32I-NEXT: .cfi_def_cfa_offset 528 +; RV32I-NEXT: sw ra, 524(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call test@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: lw ra, 524(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 528 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -528 +; RV64I-NEXT: .cfi_def_cfa_offset 528 +; RV64I-NEXT: sd ra, 520(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call test@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ld ra, 520(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 528 +; RV64I-NEXT: ret + %1 = alloca [512 x i8] + %2 = getelementptr [512 x i8], [512 x i8]* %1, i32 0, i32 0 + call void @test(i8* %2) + ret i32 0 +} + +define dso_local i32 @pushpopret0(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret0: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret0: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret0: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 0 +} + +define dso_local i32 @pushpopret1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 1 +} + +define dso_local i32 @pushpopretneg1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopretneg1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, -1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopretneg1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, -1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopretneg1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, -1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopretneg1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 -1 +} + +define dso_local i32 @pushpopret2(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret2: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 2 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret2: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 2 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 2 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 2 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 2 +} + +define dso_local i32 @tailcall(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: tailcall: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: tailcall: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: tailcall: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: tailcall: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +@var = global [5 x i32] zeroinitializer +define dso_local i32 @nocompress(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: nocompress: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 48 +; RV32IZCMP-NEXT: cm.push {ra, s0-s8}, -48 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: .cfi_offset s1, -12 +; RV32IZCMP-NEXT: .cfi_offset s2, -16 +; RV32IZCMP-NEXT: .cfi_offset s3, -20 +; RV32IZCMP-NEXT: .cfi_offset s4, -24 +; RV32IZCMP-NEXT: .cfi_offset s5, -28 +; RV32IZCMP-NEXT: .cfi_offset s6, -32 +; RV32IZCMP-NEXT: .cfi_offset s7, -36 +; RV32IZCMP-NEXT: .cfi_offset s8, -40 +; RV32IZCMP-NEXT: addi s0, sp, 48 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub s2, sp, a0 +; RV32IZCMP-NEXT: mv sp, s2 +; RV32IZCMP-NEXT: lui s1, %hi(var) +; RV32IZCMP-NEXT: lw s3, %lo(var)(s1) +; RV32IZCMP-NEXT: lw s4, %lo(var+4)(s1) +; RV32IZCMP-NEXT: lw s5, %lo(var+8)(s1) +; RV32IZCMP-NEXT: lw s6, %lo(var+12)(s1) +; RV32IZCMP-NEXT: addi s7, s1, %lo(var) +; RV32IZCMP-NEXT: lw s8, 16(s7) +; RV32IZCMP-NEXT: mv a0, s2 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: sw s8, 16(s7) +; RV32IZCMP-NEXT: sw s6, %lo(var+12)(s1) +; RV32IZCMP-NEXT: sw s5, %lo(var+8)(s1) +; RV32IZCMP-NEXT: sw s4, %lo(var+4)(s1) +; RV32IZCMP-NEXT: sw s3, %lo(var)(s1) +; RV32IZCMP-NEXT: mv a0, s2 +; RV32IZCMP-NEXT: addi sp, s0, -48 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s8}, 48 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: nocompress: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80 +; RV64IZCMP-NEXT: cm.push {ra, s0-s8}, -80 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: .cfi_offset s1, -24 +; RV64IZCMP-NEXT: .cfi_offset s2, -32 +; RV64IZCMP-NEXT: .cfi_offset s3, -40 +; RV64IZCMP-NEXT: .cfi_offset s4, -48 +; RV64IZCMP-NEXT: .cfi_offset s5, -56 +; RV64IZCMP-NEXT: .cfi_offset s6, -64 +; RV64IZCMP-NEXT: .cfi_offset s7, -72 +; RV64IZCMP-NEXT: .cfi_offset s8, -80 +; RV64IZCMP-NEXT: addi s0, sp, 80 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub s2, sp, a0 +; RV64IZCMP-NEXT: mv sp, s2 +; RV64IZCMP-NEXT: lui s1, %hi(var) +; RV64IZCMP-NEXT: lw s3, %lo(var)(s1) +; RV64IZCMP-NEXT: lw s4, %lo(var+4)(s1) +; RV64IZCMP-NEXT: lw s5, %lo(var+8)(s1) +; RV64IZCMP-NEXT: lw s6, %lo(var+12)(s1) +; RV64IZCMP-NEXT: addi s7, s1, %lo(var) +; RV64IZCMP-NEXT: lw s8, 16(s7) +; RV64IZCMP-NEXT: mv a0, s2 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: sw s8, 16(s7) +; RV64IZCMP-NEXT: sw s6, %lo(var+12)(s1) +; RV64IZCMP-NEXT: sw s5, %lo(var+8)(s1) +; RV64IZCMP-NEXT: sw s4, %lo(var+4)(s1) +; RV64IZCMP-NEXT: sw s3, %lo(var)(s1) +; RV64IZCMP-NEXT: mv a0, s2 +; RV64IZCMP-NEXT: addi sp, s0, -80 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s8}, 80 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: nocompress: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub s1, sp, a0 +; RV32I-NEXT: mv sp, s1 +; RV32I-NEXT: lui s2, %hi(var) +; RV32I-NEXT: lw s3, %lo(var)(s2) +; RV32I-NEXT: lw s4, %lo(var+4)(s2) +; RV32I-NEXT: lw s5, %lo(var+8)(s2) +; RV32I-NEXT: lw s6, %lo(var+12)(s2) +; RV32I-NEXT: addi s7, s2, %lo(var) +; RV32I-NEXT: lw s8, 16(s7) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: sw s8, 16(s7) +; RV32I-NEXT: sw s6, %lo(var+12)(s2) +; RV32I-NEXT: sw s5, %lo(var+8)(s2) +; RV32I-NEXT: sw s4, %lo(var+4)(s2) +; RV32I-NEXT: sw s3, %lo(var)(s2) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: addi sp, s0, -48 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: nocompress: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: .cfi_def_cfa_offset 80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: .cfi_offset s6, -64 +; RV64I-NEXT: .cfi_offset s7, -72 +; RV64I-NEXT: .cfi_offset s8, -80 +; RV64I-NEXT: addi s0, sp, 80 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub s1, sp, a0 +; RV64I-NEXT: mv sp, s1 +; RV64I-NEXT: lui s2, %hi(var) +; RV64I-NEXT: lw s3, %lo(var)(s2) +; RV64I-NEXT: lw s4, %lo(var+4)(s2) +; RV64I-NEXT: lw s5, %lo(var+8)(s2) +; RV64I-NEXT: lw s6, %lo(var+12)(s2) +; RV64I-NEXT: addi s7, s2, %lo(var) +; RV64I-NEXT: lw s8, 16(s7) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: sw s8, 16(s7) +; RV64I-NEXT: sw s6, %lo(var+12)(s2) +; RV64I-NEXT: sw s5, %lo(var+8)(s2) +; RV64I-NEXT: sw s4, %lo(var+4)(s2) +; RV64I-NEXT: sw s3, %lo(var)(s2) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: addi sp, s0, -80 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %val = load [5 x i32], [5 x i32]* @var + call void @callee_void(i8* nonnull %0) + store volatile [5 x i32] %val, [5 x i32]* @var + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +declare void @callee_void(i8*) +declare i32 @callee(i8*) + +declare i32 @foo_test_irq(...) +@var_test_irq = global [32 x i32] zeroinitializer + +define void @foo_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: foo_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: cm.pop {ra}, 64 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: foo_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: addi sp, sp, -64 +; RV64IZCMP-NEXT: sd t0, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 92(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 84(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 76(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 68(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 60(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 52(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 44(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 36(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 28(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 20(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 12(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 4(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: ld t6, 4(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 12(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 20(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 28(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 36(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 44(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 52(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 60(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 68(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 76(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 84(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 92(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 64 +; RV64IZCMP-NEXT: cm.pop {ra}, 64 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: foo_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: uret +; +; RV64I-LABEL: foo_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: uret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @foo_no_irq() nounwind{ +; RV32IZCMP-LABEL: foo_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -16 +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV64IZCMP-LABEL: foo_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -16 +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV32I-LABEL: foo_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @callee_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: callee_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t4, 24(a5) +; RV32IZCMP-NEXT: lw t5, 28(a5) +; RV32IZCMP-NEXT: lw t6, 32(a5) +; RV32IZCMP-NEXT: lw s2, 36(a5) +; RV32IZCMP-NEXT: lw s3, 40(a5) +; RV32IZCMP-NEXT: lw s4, 44(a5) +; RV32IZCMP-NEXT: lw s5, 48(a5) +; RV32IZCMP-NEXT: lw s6, 52(a5) +; RV32IZCMP-NEXT: lw s7, 56(a5) +; RV32IZCMP-NEXT: lw s8, 60(a5) +; RV32IZCMP-NEXT: lw s9, 64(a5) +; RV32IZCMP-NEXT: lw s10, 68(a5) +; RV32IZCMP-NEXT: lw s11, 72(a5) +; RV32IZCMP-NEXT: lw ra, 76(a5) +; RV32IZCMP-NEXT: lw s1, 80(a5) +; RV32IZCMP-NEXT: lw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 88(a5) +; RV32IZCMP-NEXT: lw t1, 92(a5) +; RV32IZCMP-NEXT: lw t0, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw t1, 92(a5) +; RV32IZCMP-NEXT: sw t2, 88(a5) +; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: sw s1, 80(a5) +; RV32IZCMP-NEXT: sw ra, 76(a5) +; RV32IZCMP-NEXT: sw s11, 72(a5) +; RV32IZCMP-NEXT: sw s10, 68(a5) +; RV32IZCMP-NEXT: sw s9, 64(a5) +; RV32IZCMP-NEXT: sw s8, 60(a5) +; RV32IZCMP-NEXT: sw s7, 56(a5) +; RV32IZCMP-NEXT: sw s6, 52(a5) +; RV32IZCMP-NEXT: sw s5, 48(a5) +; RV32IZCMP-NEXT: sw s4, 44(a5) +; RV32IZCMP-NEXT: sw s3, 40(a5) +; RV32IZCMP-NEXT: sw s2, 36(a5) +; RV32IZCMP-NEXT: sw t6, 32(a5) +; RV32IZCMP-NEXT: sw t5, 28(a5) +; RV32IZCMP-NEXT: sw t4, 24(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 112 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: callee_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -112 +; RV64IZCMP-NEXT: sd t0, 212(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 204(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 196(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 188(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 180(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 172(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 164(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 156(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 148(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 140(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 132(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 124(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t4, 24(a5) +; RV64IZCMP-NEXT: lw t5, 28(a5) +; RV64IZCMP-NEXT: lw t6, 32(a5) +; RV64IZCMP-NEXT: lw s2, 36(a5) +; RV64IZCMP-NEXT: lw s3, 40(a5) +; RV64IZCMP-NEXT: lw s4, 44(a5) +; RV64IZCMP-NEXT: lw s5, 48(a5) +; RV64IZCMP-NEXT: lw s6, 52(a5) +; RV64IZCMP-NEXT: lw s7, 56(a5) +; RV64IZCMP-NEXT: lw s8, 60(a5) +; RV64IZCMP-NEXT: lw s9, 64(a5) +; RV64IZCMP-NEXT: lw s10, 68(a5) +; RV64IZCMP-NEXT: lw s11, 72(a5) +; RV64IZCMP-NEXT: lw ra, 76(a5) +; RV64IZCMP-NEXT: lw s1, 80(a5) +; RV64IZCMP-NEXT: lw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 88(a5) +; RV64IZCMP-NEXT: lw t1, 92(a5) +; RV64IZCMP-NEXT: lw t0, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw t1, 92(a5) +; RV64IZCMP-NEXT: sw t2, 88(a5) +; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: sw s1, 80(a5) +; RV64IZCMP-NEXT: sw ra, 76(a5) +; RV64IZCMP-NEXT: sw s11, 72(a5) +; RV64IZCMP-NEXT: sw s10, 68(a5) +; RV64IZCMP-NEXT: sw s9, 64(a5) +; RV64IZCMP-NEXT: sw s8, 60(a5) +; RV64IZCMP-NEXT: sw s7, 56(a5) +; RV64IZCMP-NEXT: sw s6, 52(a5) +; RV64IZCMP-NEXT: sw s5, 48(a5) +; RV64IZCMP-NEXT: sw s4, 44(a5) +; RV64IZCMP-NEXT: sw s3, 40(a5) +; RV64IZCMP-NEXT: sw s2, 36(a5) +; RV64IZCMP-NEXT: sw t6, 32(a5) +; RV64IZCMP-NEXT: sw t5, 28(a5) +; RV64IZCMP-NEXT: sw t4, 24(a5) +; RV64IZCMP-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: ld t6, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 124(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 132(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 140(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 148(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 156(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 164(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 172(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 180(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 188(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 196(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 204(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 212(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 112 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: callee_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 +; RV32I-NEXT: uret +; +; RV64I-LABEL: callee_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -272 +; RV64I-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 256(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 248(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 240(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 232(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 224(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 216(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 232(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 224(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 272 +; RV64I-NEXT: uret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +} + +define void @callee_no_irq() nounwind{ +; RV32IZCMP-LABEL: callee_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t4, 24(a5) +; RV32IZCMP-NEXT: lw t5, 28(a5) +; RV32IZCMP-NEXT: lw t6, 32(a5) +; RV32IZCMP-NEXT: lw s2, 36(a5) +; RV32IZCMP-NEXT: lw s3, 40(a5) +; RV32IZCMP-NEXT: lw s4, 44(a5) +; RV32IZCMP-NEXT: lw s5, 48(a5) +; RV32IZCMP-NEXT: lw s6, 52(a5) +; RV32IZCMP-NEXT: lw s7, 56(a5) +; RV32IZCMP-NEXT: lw s8, 60(a5) +; RV32IZCMP-NEXT: lw s9, 64(a5) +; RV32IZCMP-NEXT: lw s10, 68(a5) +; RV32IZCMP-NEXT: lw s11, 72(a5) +; RV32IZCMP-NEXT: lw ra, 76(a5) +; RV32IZCMP-NEXT: lw s1, 80(a5) +; RV32IZCMP-NEXT: lw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 88(a5) +; RV32IZCMP-NEXT: lw t1, 92(a5) +; RV32IZCMP-NEXT: lw t0, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw t0, 96(a5) +; RV32IZCMP-NEXT: sw t1, 92(a5) +; RV32IZCMP-NEXT: sw t2, 88(a5) +; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: sw s1, 80(a5) +; RV32IZCMP-NEXT: sw ra, 76(a5) +; RV32IZCMP-NEXT: sw s11, 72(a5) +; RV32IZCMP-NEXT: sw s10, 68(a5) +; RV32IZCMP-NEXT: sw s9, 64(a5) +; RV32IZCMP-NEXT: sw s8, 60(a5) +; RV32IZCMP-NEXT: sw s7, 56(a5) +; RV32IZCMP-NEXT: sw s6, 52(a5) +; RV32IZCMP-NEXT: sw s5, 48(a5) +; RV32IZCMP-NEXT: sw s4, 44(a5) +; RV32IZCMP-NEXT: sw s3, 40(a5) +; RV32IZCMP-NEXT: sw s2, 36(a5) +; RV32IZCMP-NEXT: sw t6, 32(a5) +; RV32IZCMP-NEXT: sw t5, 28(a5) +; RV32IZCMP-NEXT: sw t4, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; +; RV64IZCMP-LABEL: callee_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t4, 24(a5) +; RV64IZCMP-NEXT: lw t5, 28(a5) +; RV64IZCMP-NEXT: lw t6, 32(a5) +; RV64IZCMP-NEXT: lw s2, 36(a5) +; RV64IZCMP-NEXT: lw s3, 40(a5) +; RV64IZCMP-NEXT: lw s4, 44(a5) +; RV64IZCMP-NEXT: lw s5, 48(a5) +; RV64IZCMP-NEXT: lw s6, 52(a5) +; RV64IZCMP-NEXT: lw s7, 56(a5) +; RV64IZCMP-NEXT: lw s8, 60(a5) +; RV64IZCMP-NEXT: lw s9, 64(a5) +; RV64IZCMP-NEXT: lw s10, 68(a5) +; RV64IZCMP-NEXT: lw s11, 72(a5) +; RV64IZCMP-NEXT: lw ra, 76(a5) +; RV64IZCMP-NEXT: lw s1, 80(a5) +; RV64IZCMP-NEXT: lw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 88(a5) +; RV64IZCMP-NEXT: lw t1, 92(a5) +; RV64IZCMP-NEXT: lw t0, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw t0, 96(a5) +; RV64IZCMP-NEXT: sw t1, 92(a5) +; RV64IZCMP-NEXT: sw t2, 88(a5) +; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: sw s1, 80(a5) +; RV64IZCMP-NEXT: sw ra, 76(a5) +; RV64IZCMP-NEXT: sw s11, 72(a5) +; RV64IZCMP-NEXT: sw s10, 68(a5) +; RV64IZCMP-NEXT: sw s9, 64(a5) +; RV64IZCMP-NEXT: sw s8, 60(a5) +; RV64IZCMP-NEXT: sw s7, 56(a5) +; RV64IZCMP-NEXT: sw s6, 52(a5) +; RV64IZCMP-NEXT: sw s5, 48(a5) +; RV64IZCMP-NEXT: sw s4, 44(a5) +; RV64IZCMP-NEXT: sw s3, 40(a5) +; RV64IZCMP-NEXT: sw s2, 36(a5) +; RV64IZCMP-NEXT: sw t6, 32(a5) +; RV64IZCMP-NEXT: sw t5, 28(a5) +; RV64IZCMP-NEXT: sw t4, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 +; +; RV32I-LABEL: callee_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 +; RV64I-NEXT: ret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +}