diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -37,6 +37,8 @@
   RISCVMacroFusion.cpp
   RISCVMCInstLower.cpp
   RISCVMergeBaseOffset.cpp
+  RISCVMoveOptimizer.cpp
+  RISCVPushPopOptimizer.cpp
   RISCVRedundantCopyElimination.cpp
   RISCVRegisterBankInfo.cpp
   RISCVRegisterInfo.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -68,6 +68,12 @@
 FunctionPass *createRISCVRedundantCopyEliminationPass();
 void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
 
+FunctionPass *createRISCVMoveOptimizationPass();
+void initializeRISCVMoveOptPass(PassRegistry &);
+
+FunctionPass *createRISCVPushPopOptimizationPass();
+void initializeRISCVPushPopOptPass(PassRegistry &);
+
 InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
                                                     RISCVSubtarget &,
                                                     RISCVRegisterBankInfo &);
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -46,6 +46,8 @@
 
   bool hasBP(const MachineFunction &MF) const;
 
+  bool isCSIpushable(const std::vector<CalleeSavedInfo> &CSI) const;
+
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -222,6 +222,97 @@
   return RestoreLibCalls[LibCallID];
 }
 
+// Return encoded value for PUSH/POP instruction, representing
+// registers to store/load.
+static int getPushPopEncoding(const Register MaxReg) {
+  switch (MaxReg) {
+  default:
+    llvm_unreachable("Something has gone wrong!");
+  case /*s11*/ RISCV::X27:
+    return 15;
+  case /*s9*/ RISCV::X25:
+    return 14;
+  case /*s8*/ RISCV::X24:
+    return 13;
+  case /*s7*/ RISCV::X23:
+    return 12;
+  case /*s6*/ RISCV::X22:
+    return 11;
+  case /*s5*/ RISCV::X21:
+    return 10;
+  case /*s4*/ RISCV::X20:
+    return 9;
+  case /*s3*/ RISCV::X19:
+    return 8;
+  case /*s2*/ RISCV::X18:
+    return 7;
+  case /*s1*/ RISCV::X9:
+    return 6;
+  case /*s0*/ RISCV::X8:
+    return 5;
+  case /*ra*/ RISCV::X1:
+    return 4;
+  }
+}
+
+void reallocPushStackFream(MachineFunction &MF) {
+  auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  std::vector<CalleeSavedInfo> CSI = MFI.getCalleeSavedInfo();
+  // realloc stack frame for PUSH
+  size_t NonePushStackOffset = -RVFI->getRVPushStackSize();
+  for (const auto &Entry : CSI) {
+    int FrameIdx = Entry.getFrameIdx();
+    Register Reg = Entry.getReg();
+    if (!(Reg == RISCV::X26 || RISCV::PGPRRegClass.contains(Reg))) {
+      NonePushStackOffset -= MFI.getObjectSize(Entry.getFrameIdx());
+      MFI.setObjectOffset(FrameIdx, NonePushStackOffset);
+    }
+  }
+}
+
+static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI,
+                               uint64_t StackAdj, bool isPop) {
+  // The spec allocates 2 bits to specify number of extra 16 byte blocks.
+  uint32_t AvailableAdj = 48;
+  uint64_t RequiredAdj = StackAdj;
+
+  // Use available stack adjustment in Zc PUSH/POP instruction
+  // to allocate/deallocate space on stack.
+  int OpNum = MBBI->getNumOperands();
+  auto &Operand = MBBI->getOperand(OpNum - 1);
+  int RegisterOffset = Operand.getImm();
+  RequiredAdj -= RegisterOffset;
+
+  if (RequiredAdj >= AvailableAdj) {
+    RequiredAdj -= AvailableAdj;
+    StackAdj = AvailableAdj;
+  } else {
+    // Round to the nearest 16 byte block able to fit RequiredAdj.
+    StackAdj = alignTo(RequiredAdj, 16);
+    RequiredAdj = 0;
+  }
+  Operand.setImm(StackAdj);
+  MBBI->setFlag(isPop ? MachineInstr::FrameDestroy : MachineInstr::FrameSetup);
+  return RequiredAdj;
+}
+
+// Checks if Zc PUSH/POP instructions can be used with the given CSI.
+bool RISCVFrameLowering::isCSIpushable(
+    const std::vector<CalleeSavedInfo> &CSI) const {
+  if (!STI.hasStdExtZcmp() || CSI.empty())
+    return false;
+  for (auto &CS : CSI) {
+    Register Reg = CS.getReg();
+    const TargetRegisterClass *RC =
+        STI.getRegisterInfo()->getMinimalPhysRegClass(Reg);
+    if (RISCV::PGPRRegClass.hasSubClassEq(RC))
+      return true;
+  }
+  return false;
+}
+
 // Return true if the specified function should have a dedicated frame
 // pointer register.  This is true if frame pointer elimination is
 // disabled, if it needs dynamic stack realignment, if the function has
@@ -350,11 +441,11 @@
 // Returns the register used to hold the stack pointer.
 static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
 
-static SmallVector<CalleeSavedInfo, 8>
+static std::vector<CalleeSavedInfo>
 getNonLibcallCSI(const MachineFunction &MF,
                  const std::vector<CalleeSavedInfo> &CSI) {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
+  std::vector<CalleeSavedInfo> NonLibcallCSI;
 
   for (auto &CS : CSI) {
     int FI = CS.getFrameIdx();
@@ -469,8 +560,36 @@
     RealStackSize = FirstSPAdjustAmount;
   }
 
-  // Allocate space on the stack if necessary.
-  adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
+  const auto &CSI = MFI.getCalleeSavedInfo();
+  bool PushEnabled = isCSIpushable(CSI);
+  if (PushEnabled && (CSI.size() != 0)) {
+    // Check at what offset spilling of registers starts and allocate space
+    // before it.
+    int64_t preAdjustStack = 0;
+    for (auto CS : CSI) {
+      preAdjustStack =
+          std::min(preAdjustStack, -(MFI.getObjectOffset(CS.getFrameIdx()) +
+                                     MFI.getObjectSize(CS.getFrameIdx())));
+    }
+    if (preAdjustStack != 0)
+      adjustReg(MBB, MBBI, DL, SPReg, SPReg, -preAdjustStack,
+                MachineInstr::FrameSetup);
+    StackSize -= preAdjustStack;
+
+    // Use available stack adjustment in push instruction to allocate additional
+    // stack space.
+    StackSize = adjSPInPushPop(MBBI, StackSize, false);
+    if (StackSize != 0) {
+      adjustReg(MBB, next_nodbg(MBBI, MBB.end()), DL, SPReg, SPReg, -StackSize,
+                MachineInstr::FrameSetup);
+      MBBI = next_nodbg(MBBI, MBB.end());
+      reallocPushStackFream(MF);
+    }
+  } else {
+    // Allocate space on the stack if necessary.
+    adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize,
+              MachineInstr::FrameSetup);
+  }
 
   // Emit ".cfi_def_cfa_offset RealStackSize"
   unsigned CFIIndex = MF.addFrameInst(
@@ -479,15 +598,16 @@
       .addCFIIndex(CFIIndex)
       .setMIFlag(MachineInstr::FrameSetup);
 
-  const auto &CSI = MFI.getCalleeSavedInfo();
-
-  // The frame pointer is callee-saved, and code has been generated for us to
-  // save it to the stack. We need to skip over the storing of callee-saved
-  // registers as the frame pointer must be modified after it has been saved
-  // to the stack, not before.
-  // FIXME: assumes exactly one instruction is used to save each callee-saved
-  // register.
-  std::advance(MBBI, getNonLibcallCSI(MF, CSI).size());
+  if (PushEnabled)
+    std::advance(MBBI, 1);
+  else
+    // The frame pointer is callee-saved, and code has been generated for us to
+    // save it to the stack. We need to skip over the storing of callee-saved
+    // registers as the frame pointer must be modified after it has been saved
+    // to the stack, not before.
+    // FIXME: assumes exactly one instruction is used to save each callee-saved
+    // register.
+    std::advance(MBBI, getNonLibcallCSI(MF, CSI).size());
 
   // Iterate over list of callee-saved registers and emit .cfi_offset
   // directives.
@@ -628,7 +748,10 @@
   // FIXME: assumes exactly one instruction is used to restore each
   // callee-saved register.
   auto LastFrameDestroy = MBBI;
-  if (!CSI.empty())
+  bool PopEnabled = isCSIpushable(CSI);
+  if (PopEnabled)
+    LastFrameDestroy = prev_nodbg(MBBI, MBB.begin());
+  else if (!CSI.empty())
     LastFrameDestroy = std::prev(MBBI, CSI.size());
 
   uint64_t StackSize = getStackSizeWithRVVPadding(MF);
@@ -672,7 +795,31 @@
     StackSize = FirstSPAdjustAmount;
 
   // Deallocate stack
-  adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
+  if (PopEnabled) {
+    // Check at what offset spilling of registers starts and calculate space
+    // before it.
+    int64_t preAdjustSize = 0;
+    for (auto CS : CSI) {
+      preAdjustSize =
+          std::min(preAdjustSize, -(MFI.getObjectOffset(CS.getFrameIdx()) +
+                                    MFI.getObjectSize(CS.getFrameIdx())));
+    }
+    adjustReg(MBB, MBBI, DL, SPReg, SPReg, preAdjustSize,
+              MachineInstr::FrameDestroy);
+    StackSize -= preAdjustSize;
+    if (preAdjustSize != 0)
+      MBBI = prev_nodbg(MBBI, MBB.begin());
+
+    // Use available stack adjustment in pop instruction to deallocate stack
+    // space.
+    StackSize = adjSPInPushPop(prev_nodbg(MBBI, MBB.begin()), StackSize, true);
+    if (StackSize != 0) {
+      adjustReg(MBB, prev_nodbg(MBBI, MBB.begin()), DL, SPReg, SPReg, StackSize,
+                MachineInstr::FrameDestroy);
+    }
+  } else
+    adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize,
+              MachineInstr::FrameDestroy);
 
   // Emit epilogue for shadow call stack.
   emitSCSEpilogue(MF, MBB, MBBI, DL);
@@ -1159,26 +1306,67 @@
   if (MI != MBB.end() && !MI->isDebugInstr())
     DL = MI->getDebugLoc();
 
-  const char *SpillLibCall = getSpillLibCallName(*MF, CSI);
-  if (SpillLibCall) {
-    // Add spill libcall via non-callee-saved register t0.
-    BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
-        .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
-        .setMIFlag(MachineInstr::FrameSetup);
+  // Emmit CM.PUSH with base SPimm & evaluate Push stack
+  if (isCSIpushable(CSI.vec())) {
+    auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+    uint64_t PushStackSize = 0;
+    std::vector<CalleeSavedInfo> NonePushCSI;
+    Register MaxReg = RISCV::NoRegister;
+
+    for (auto &CS : CSI) {
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      if (RISCV::PGPRRegClass.hasSubClassEq(RC)) {
+        if (Reg != RISCV::X27)
+          PushStackSize += 4;
+        MaxReg = std::max(MaxReg.id(), Reg.id());
+      } else if (Reg.id() == RISCV::X26) {
+        PushStackSize += 8;
+        MaxReg = RISCV::X27;
+      } else
+        NonePushCSI.push_back(CS);
+    }
+    RVFI->setRVPushStackSize(PushStackSize);
+
+    MachineInstrBuilder PushBuilder =
+        BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH));
+    // Use encoded number to represent registers to spill.
+    int RegEnc = getPushPopEncoding(MaxReg);
+    PushBuilder.addImm(RegEnc);
+    // Calculate SpImm Base adjustment, and SpImm field will be updated
+    // through adjSPInPushPop.
+    bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit];
+    bool isEABI = false; // Reserved for future implementation
+    uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI);
+    PushBuilder.addImm(SpImmBase);
+
+    for (auto &CS : NonePushCSI) {
+      Register Reg = CS.getReg();
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(),
+                              TRI->getMinimalPhysRegClass(Reg), TRI);
+    }
+  } else {
+    const char *SpillLibCall = getSpillLibCallName(*MF, CSI);
+    if (SpillLibCall) {
+      // Add spill libcall via non-callee-saved register t0.
+      BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
+          .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
+          .setMIFlag(MachineInstr::FrameSetup);
 
-    // Add registers spilled in libcall as liveins.
-    for (auto &CS : CSI)
-      MBB.addLiveIn(CS.getReg());
-  }
+      // Add registers spilled in libcall as liveins.
+      for (auto &CS : CSI)
+        MBB.addLiveIn(CS.getReg());
+    }
 
-  // Manually spill values not spilled by libcall.
-  const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : NonLibcallCSI) {
-    // Insert the spill to the stack frame.
-    Register Reg = CS.getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
-                            RC, TRI);
+    // Manually spill values not spilled by libcall.
+    const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
+    for (auto &CS : NonLibcallCSI) {
+      // Insert the spill to the stack frame.
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg),
+                              CS.getFrameIdx(), RC, TRI);
+    }
   }
 
   return true;
@@ -1196,36 +1384,63 @@
   if (MI != MBB.end() && !MI->isDebugInstr())
     DL = MI->getDebugLoc();
 
-  // Manually restore values not restored by libcall.
-  // Keep the same order as in the prologue. There is no need to reverse the
-  // order in the epilogue. In addition, the return address will be restored
-  // first in the epilogue. It increases the opportunity to avoid the
-  // load-to-use data hazard between loading RA and return by RA.
-  // loadRegFromStackSlot can insert multiple instructions.
-  const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : NonLibcallCSI) {
-    Register Reg = CS.getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
-    assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
-  }
+  if (isCSIpushable(CSI.vec())) {
+    Register MaxReg = RISCV::NoRegister;
+
+    for (auto &CS : reverse(CSI)) {
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      if (RISCV::PGPRRegClass.hasSubClassEq(RC))
+        MaxReg = std::max(MaxReg.id(), Reg.id());
+      else if (Reg.id() == RISCV::X26) {
+        MaxReg = RISCV::X27;
+      } else
+        TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
+    }
 
-  const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
-  if (RestoreLibCall) {
-    // Add restore libcall via tail call.
-    MachineBasicBlock::iterator NewMI =
-        BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
-            .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
-            .setMIFlag(MachineInstr::FrameDestroy);
-
-    // Remove trailing returns, since the terminator is now a tail call to the
-    // restore function.
-    if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
-      NewMI->copyImplicitOps(*MF, *MI);
-      MI->eraseFromParent();
+    MachineInstrBuilder PopBuilder =
+        BuildMI(MBB, MI, DL, TII.get(RISCV::CM_POP));
+    // Use encoded number to represent registers to restore.
+    int RegEnc = getPushPopEncoding(MaxReg);
+    PopBuilder.addImm(RegEnc);
+    // Calculate SpImm Base adjustment, and SpImm field will be updated
+    // through adjSPInPushPop.
+    bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit];
+    bool isEABI = false; // Reserved for future implementation
+    uint32_t SpImmBase = RISCVZC::getStackAdjBase(RegEnc, isRV64, isEABI);
+    PopBuilder.addImm(SpImmBase);
+  } else {
+    // Manually restore values not restored by libcall.
+    // Keep the same order as in the prologue. There is no need to reverse the
+    // order in the epilogue. In addition, the return address will be restored
+    // first in the epilogue. It increases the opportunity to avoid the
+    // load-to-use data hazard between loading RA and return by RA.
+    // loadRegFromStackSlot can insert multiple instructions.
+    const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
+    for (auto &CS : NonLibcallCSI) {
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
+      assert(MI != MBB.begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
     }
-  }
 
+    const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
+    if (RestoreLibCall) {
+      // Add restore libcall via tail call.
+      MachineBasicBlock::iterator NewMI =
+          BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
+              .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
+              .setMIFlag(MachineInstr::FrameDestroy);
+
+      // Remove trailing returns, since the terminator is now a tail call to the
+      // restore function.
+      if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
+        NewMI->copyImplicitOps(*MF, *MI);
+        MI->eraseFromParent();
+      }
+    }
+  }
   return true;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -63,6 +63,8 @@
   uint64_t RVVPadding = 0;
   /// Size of stack frame to save callee saved registers
   unsigned CalleeSavedStackSize = 0;
+  /// Size of stack frame for zcmp PUSH/POP
+  unsigned RVPushStackSize = 0;
 
 public:
   RISCVMachineFunctionInfo(const MachineFunction &MF) {}
@@ -93,7 +95,8 @@
     // function uses a varargs save area, or is an interrupt handler.
     return MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
            VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() &&
-           !MF.getFunction().hasFnAttribute("interrupt");
+           !MF.getFunction().hasFnAttribute("interrupt") &&
+           !MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp();
   }
 
   uint64_t getRVVStackSize() const { return RVVStackSize; }
@@ -108,6 +111,9 @@
   unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
   void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
 
+  uint64_t getRVPushStackSize() const { return RVPushStackSize; }
+  void setRVPushStackSize(uint64_t Size) { RVPushStackSize = Size; }
+
   void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI);
 };
 
diff --git a/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp
@@ -0,0 +1,252 @@
+//===---------- RISCVMoveOptimizer.cpp - RISCV move opt. pass -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs move related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstrInfo.h"
+#include "RISCVMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define RISCV_MOVE_OPT_NAME "RISC-V Zc move merging pass"
+
+namespace {
+struct RISCVMoveOpt : public MachineFunctionPass {
+  static char ID;
+
+  RISCVMoveOpt() : MachineFunctionPass(ID) {
+    initializeRISCVMoveOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  const RISCVInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const RISCVSubtarget *Subtarget;
+
+  // Track which register units have been modified and used.
+  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+  bool isCandidateToMergeMVA01S(DestSourcePair &RegPair);
+  bool isCandidateToMergeMVSA01(DestSourcePair &RegPair);
+  // Merge the two instructions indicated into a single pair instruction.
+  MachineBasicBlock::iterator
+  mergePairedInsns(MachineBasicBlock::iterator I,
+                   MachineBasicBlock::iterator Paired, unsigned Opcode);
+
+  // Look for C.MV instruction that can be combined with
+  // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching
+  // instruction if one exists.
+  MachineBasicBlock::iterator
+  findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode);
+  bool MovOpt(MachineBasicBlock &MBB);
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  StringRef getPassName() const override { return RISCV_MOVE_OPT_NAME; }
+};
+
+char RISCVMoveOpt::ID = 0;
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVMoveOpt, "riscv-mov-opt", RISCV_MOVE_OPT_NAME, false,
+                false)
+
+// Check if registers meet CM.MVA01S constraints.
+bool RISCVMoveOpt::isCandidateToMergeMVA01S(DestSourcePair &RegPair) {
+  Register Destination = RegPair.Destination->getReg();
+  Register Source = RegPair.Source->getReg();
+  const TargetRegisterClass *SourceRC = TRI->getMinimalPhysRegClass(Source);
+  // If destination is not a0 or a1.
+  if (Destination == RISCV::X10 || Destination == RISCV::X11)
+    if (RISCV::SR07RegClass.hasSubClassEq(SourceRC))
+      return true;
+  return false;
+}
+
+// Check if registers meet CM.MVSA01 constraints.
+bool RISCVMoveOpt::isCandidateToMergeMVSA01(DestSourcePair &RegPair) {
+  Register Destination = RegPair.Destination->getReg();
+  Register Source = RegPair.Source->getReg();
+  const TargetRegisterClass *DestinationRC =
+      TRI->getMinimalPhysRegClass(Destination);
+  // If Source is s0 - s7.
+  if (RISCV::SR07RegClass.hasSubClassEq(DestinationRC))
+    if (Source == RISCV::X10 || Source == RISCV::X11)
+      return true;
+  return false;
+}
+
+MachineBasicBlock::iterator
+RISCVMoveOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+                               MachineBasicBlock::iterator Paired,
+                               unsigned Opcode) {
+  const MachineOperand *Sreg1, *Sreg2;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+  DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value();
+  DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value();
+  Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S
+                                 ? FirstPair.Destination->getReg()
+                                 : FirstPair.Source->getReg();
+
+  if (NextI == Paired)
+    NextI = next_nodbg(NextI, E);
+  DebugLoc DL = I->getDebugLoc();
+
+  // The order of S-reg depends on which instruction holds A0, instead of
+  // the order of register pair.
+  // e,g.
+  //   mv a1, s1
+  //   mv a0, s2    =>  cm.mva01s s2,s1
+  //
+  //   mv a0, s2
+  //   mv a1, s1    =>  cm.mva01s s2,s1
+  if (Opcode == RISCV::CM_MVA01S) {
+    Sreg1 =
+        ARegInFirstPair == RISCV::X10 ? FirstPair.Source : PairedRegs.Source;
+    Sreg2 =
+        ARegInFirstPair == RISCV::X10 ? PairedRegs.Source : FirstPair.Source;
+  } else {
+    Sreg1 = ARegInFirstPair == RISCV::X10 ? FirstPair.Destination
+                                          : PairedRegs.Destination;
+    Sreg2 = ARegInFirstPair == RISCV::X10 ? PairedRegs.Destination
+                                          : FirstPair.Destination;
+  }
+
+  BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2);
+
+  I->eraseFromParent();
+  Paired->eraseFromParent();
+  return NextI;
+}
+
+MachineBasicBlock::iterator
+RISCVMoveOpt::findMatchingInst(MachineBasicBlock::iterator &MBBI,
+                               unsigned InstOpcode) {
+  MachineBasicBlock::iterator E = MBBI->getParent()->end();
+  DestSourcePair FirstPair = TII->isCopyInstrImpl(*MBBI).value();
+
+  // Track which register units have been modified and used between the first
+  // insn and the second insn.
+  ModifiedRegUnits.clear();
+  UsedRegUnits.clear();
+
+  for (MachineBasicBlock::iterator I = next_nodbg(MBBI, E); I != E;
+       I = next_nodbg(I, E)) {
+
+    MachineInstr &MI = *I;
+
+    if (auto SecondPair = TII->isCopyInstrImpl(MI)) {
+      Register SourceReg = SecondPair->Source->getReg();
+      Register DestReg = SecondPair->Destination->getReg();
+
+      if (InstOpcode == RISCV::CM_MVA01S &&
+          isCandidateToMergeMVA01S(*SecondPair)) {
+        // If register pair is valid and destination registers are different.
+        if ((FirstPair.Destination->getReg() == DestReg))
+          return E;
+
+        //  If paired destination register was modified or used, there is no
+        //  possibility of finding matching instruction so exit early.
+        if (!ModifiedRegUnits.available(DestReg) ||
+            !UsedRegUnits.available(DestReg))
+          return E;
+
+        // We need to check if the source register in the second paired
+        // instruction is defined in between.
+        if (ModifiedRegUnits.available(SourceReg))
+          return I;
+
+      } else if (InstOpcode == RISCV::CM_MVSA01 &&
+                 isCandidateToMergeMVSA01(*SecondPair)) {
+        if ((FirstPair.Source->getReg() == SourceReg) ||
+            (FirstPair.Destination->getReg() == DestReg))
+          return E;
+
+        if (!ModifiedRegUnits.available(SourceReg) ||
+            !UsedRegUnits.available(SourceReg))
+          return E;
+
+        // As for mvsa01, we need to make sure the dest register of the second
+        // paired instruction is not used in between, since we would move its
+        // definition ahead.
+        if (UsedRegUnits.available(DestReg))
+          return I;
+      }
+    }
+    // Update modified / used register units.
+    LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+  }
+  return E;
+}
+
+// Finds instructions, which could be represented as C.MV instructions and
+// merged into CM.MVA01S or CM.MVSA01.
+bool RISCVMoveOpt::MovOpt(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;) {
+    // Check if the instruction can be compressed to C.MV instruction. If it
+    // can, return Dest/Src register pair.
+    auto RegPair = TII->isCopyInstrImpl(*MBBI);
+    if (RegPair.has_value()) {
+      unsigned Opcode = 0;
+
+      if (isCandidateToMergeMVA01S(*RegPair))
+        Opcode = RISCV::CM_MVA01S;
+      else if (isCandidateToMergeMVSA01(*RegPair))
+        Opcode = RISCV::CM_MVSA01;
+      else {
+        ++MBBI;
+        continue;
+      }
+
+      MachineBasicBlock::iterator Paired = findMatchingInst(MBBI, Opcode);
+      // If matching instruction could be found merge them.
+      if (Paired != E) {
+        MBBI = mergePairedInsns(MBBI, Paired, Opcode);
+        Modified = true;
+        continue;
+      }
+    }
+    ++MBBI;
+  }
+  return Modified;
+}
+
+bool RISCVMoveOpt::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipFunction(Fn.getFunction()))
+    return false;
+
+  Subtarget = &static_cast<const RISCVSubtarget &>(Fn.getSubtarget());
+  if (!Subtarget->hasStdExtZcmp()) {
+    return false;
+  }
+
+  TII = static_cast<const RISCVInstrInfo *>(Subtarget->getInstrInfo());
+  TRI = Subtarget->getRegisterInfo();
+  // Resize the modified and used register unit trackers.  We do this once
+  // per function and then clear the register units each time we optimize a
+  // move.
+  ModifiedRegUnits.init(*TRI);
+  UsedRegUnits.init(*TRI);
+  bool Modified = false;
+  for (auto &MBB : Fn) {
+    Modified |= MovOpt(MBB);
+  }
+  return Modified;
+}
+
+/// createRISCVMoveOptimizationPass - returns an instance of the
+/// move optimization pass.
+FunctionPass *llvm::createRISCVMoveOptimizationPass() {
+  return new RISCVMoveOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
@@ -0,0 +1,161 @@
+//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that modifies PUSH/POP instructions from Zca
+// standard to use their non prolog/epilog related functionalities
+// and generates POPRET instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstrInfo.h"
+#include "RISCVMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zc Push/Pop optimization pass"
+
+namespace {
+struct RISCVPushPopOpt : public MachineFunctionPass {
+  static char ID;
+
+  RISCVPushPopOpt() : MachineFunctionPass(ID) {
+    initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  const RISCVInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const RISCVSubtarget *Subtarget;
+
+  // Track which register units have been modified and used.
+  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+  bool usePopRet(MachineBasicBlock::iterator &MBBI);
+  bool adjustRetVal(MachineBasicBlock::iterator &MBBI);
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  std::map<MachineInstr *, int> retValMap;
+
+  StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; }
+};
+
+char RISCVPushPopOpt::ID = 0;
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME,
+                false, false)
+
+// Check if POP instruction was inserted into the MBB and return iterator to it.
+static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end();
+       MBBI = next_nodbg(MBBI, MBB.end()))
+    if (MBBI->getOpcode() == RISCV::CM_POP)
+      return MBBI;
+
+  return MBB.end();
+}
+
+bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI) {
+  MachineBasicBlock::iterator E = MBBI->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(MBBI, E);
+  // Since Pseudo instruction lowering happen later in the pipeline,
+  // this will detect all ret instruction.
+  if (NextI->getOpcode() == RISCV::PseudoRET) {
+    DebugLoc DL = NextI->getDebugLoc();
+    auto retValInfo = retValMap.find(&(*MBBI));
+    if (retValInfo == retValMap.end())
+      BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRET))
+          .add(MBBI->getOperand(0))
+          .add(MBBI->getOperand(1));
+    else if (retValInfo->second == 0)
+      BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRETZ))
+          .add(MBBI->getOperand(0))
+          .add(MBBI->getOperand(1));
+    // If the return value is not 0 then POPRETZ is not used.
+    else
+      return false;
+    MBBI->eraseFromParent();
+    NextI->eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+// Search for last assignment to a0 and if possible use ret_val slot of POP to
+// store return value.
+bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) {
+  MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend();
+  // Track which register units have been modified and used between the POP
+  // insn and the last assignment to register a0.
+  ModifiedRegUnits.clear();
+  UsedRegUnits.clear();
+  retValMap.clear();
+  Register A0(RISCV::X10);
+  // Since POP instruction is in Epilogue no normal instructions will follow
+  // after it. Therefore search only previous ones to find the return value.
+  for (MachineBasicBlock::reverse_iterator I =
+           next_nodbg(MBBI.getReverse(), RE);
+       I != RE; I = next_nodbg(I, RE)) {
+    MachineInstr &MI = *I;
+    if (auto OperandPair = TII->isLoadImmImpl(MI)) {
+      Register DestReg = OperandPair->Destination->getReg();
+      int64_t RetVal = OperandPair->Source->getImm();
+      if (DestReg == RISCV::X10) {
+        switch (RetVal) {
+        default:
+          return false;
+        case 0:
+          retValMap[&(*MBBI)] = 0;
+        }
+        MI.removeFromParent();
+        return true;
+      }
+    }
+    // Update modified / used register units.
+    LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+    // If a0 was modified or used, there is no possibility
+    // of using ret_val slot of POP instruction.
+    if (!ModifiedRegUnits.available(A0) || !UsedRegUnits.available(A0))
+      return false;
+  }
+  return false;
+}
+
+bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipFunction(Fn.getFunction()))
+    return false;
+
+  // If Zca extension is not supported abort.
+  Subtarget = &static_cast<const RISCVSubtarget &>(Fn.getSubtarget());
+  if (!Subtarget->hasStdExtZcmp()) {
+    return false;
+  }
+  TII = static_cast<const RISCVInstrInfo *>(Subtarget->getInstrInfo());
+  TRI = Subtarget->getRegisterInfo();
+  // Resize the modified and used register unit trackers.  We do this once
+  // per function and then clear the register units each time we determine
+  // correct return value for the POP.
+  ModifiedRegUnits.init(*TRI);
+  UsedRegUnits.init(*TRI);
+  bool Modified = false;
+  for (auto &MBB : Fn) {
+    auto MBBI = containsPop(MBB);
+    if (MBBI != MBB.end()) {
+      Modified |= adjustRetVal(MBBI);
+      if (MBB.isReturnBlock())
+        Modified |= usePopRet(MBBI);
+    }
+  }
+  return Modified;
+}
+
+/// createRISCVPushPopOptimizationPass - returns an instance of the
+/// Push/Pop optimization pass.
+FunctionPass *llvm::createRISCVPushPopOptimizationPass() {
+  return new RISCVPushPopOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -55,6 +55,8 @@
   initializeRISCVPreRAExpandPseudoPass(*PR);
   initializeRISCVExpandPseudoPass(*PR);
   initializeRISCVInsertVSETVLIPass(*PR);
+  initializeRISCVMoveOptPass(*PR);
+  initializeRISCVPushPopOptPass(*PR);
 }
 
 static StringRef computeDataLayout(const Triple &TT) {
@@ -242,6 +244,10 @@
 }
 
 void RISCVPassConfig::addPreEmitPass2() {
+  addPass(createRISCVMoveOptimizationPass());
+  // Schedule PushPop Optimization before expansion of Pseudo instruction,
+  // ensuring return instruction is detected correctly.
+  addPass(createRISCVPushPopOptimizationPass());
   addPass(createRISCVExpandPseudoPass());
   // Schedule the expansion of AMOs at the last possible moment, avoiding the
   // possibility for other passes to break the requirements for forward
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -55,6 +55,8 @@
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
+; CHECK-NEXT:       RISC-V Zc move merging pass 
+; CHECK-NEXT:       RISC-V Zc Push/Pop optimization pass 
 ; CHECK-NEXT:       RISCV pseudo instruction expansion pass
 ; CHECK-NEXT:       RISCV atomic pseudo instruction expansion pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -160,6 +160,8 @@
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 ; CHECK-NEXT:     Machine Outliner
 ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       RISC-V Zc move merging pass 
+; CHECK-NEXT:       RISC-V Zc Push/Pop optimization pass 
 ; CHECK-NEXT:       RISCV pseudo instruction expansion pass
 ; CHECK-NEXT:       RISCV atomic pseudo instruction expansion pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -42,6 +42,8 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV32ZICBOM %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV32ZICBOZ %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV32ZICBOP %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV32ZCMP %s
+
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefix=RV64ZMMUL %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefix=RV64MZMMUL %s
@@ -84,6 +86,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV64ZICBOM %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV64ZICBOZ %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV64ZICBOP %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV64ZCMP %s
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
 ; RV32ZMMUL: .attribute 5, "rv32i2p0_zmmul1p0"
@@ -127,6 +130,7 @@
 ; RV32ZICBOM: .attribute 5, "rv32i2p0_zicbom1p0"
 ; RV32ZICBOZ: .attribute 5, "rv32i2p0_zicboz1p0"
 ; RV32ZICBOP: .attribute 5, "rv32i2p0_zicbop1p0"
+; RV32ZCMP: .attribute 5, "rv32i2p0_zca0p70_zcmp0p70"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64ZMMUL: .attribute 5, "rv64i2p0_zmmul1p0"
@@ -170,6 +174,7 @@
 ; RV64ZICBOM: .attribute 5, "rv64i2p0_zicbom1p0"
 ; RV64ZICBOZ: .attribute 5, "rv64i2p0_zicboz1p0"
 ; RV64ZICBOP: .attribute 5, "rv64i2p0_zicbop1p0"
+; RV64ZCMP: .attribute 5, "rv64i2p0_zca0p70_zcmp0p70"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -9,6 +9,8 @@
 ; RUN:   | FileCheck %s -check-prefix=RV32I
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -frame-pointer=all < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32I-WITH-FP
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I
 ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \
@@ -19,6 +21,8 @@
 ; RUN:   | FileCheck %s -check-prefix=RV64I
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I-WITH-FP
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV64IZCMP
 
 @var = global [32 x i32] zeroinitializer
 
@@ -249,6 +253,89 @@
 ; RV32I-WITH-FP-NEXT:    addi sp, sp, 80
 ; RV32I-WITH-FP-NEXT:    ret
 ;
+; RV32IZCMP-LABEL: callee:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -80
+; RV32IZCMP-NEXT:    lui a7, %hi(var)
+; RV32IZCMP-NEXT:    lw a0, %lo(var)(a7)
+; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(a7)
+; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(a7)
+; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(a7)
+; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    addi a5, a7, %lo(var)
+; RV32IZCMP-NEXT:    lw a0, 16(a5)
+; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 20(a5)
+; RV32IZCMP-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw t1, 28(a5)
+; RV32IZCMP-NEXT:    lw t2, 32(a5)
+; RV32IZCMP-NEXT:    lw t3, 36(a5)
+; RV32IZCMP-NEXT:    lw t4, 40(a5)
+; RV32IZCMP-NEXT:    lw t5, 44(a5)
+; RV32IZCMP-NEXT:    lw t6, 48(a5)
+; RV32IZCMP-NEXT:    lw s0, 52(a5)
+; RV32IZCMP-NEXT:    lw s1, 56(a5)
+; RV32IZCMP-NEXT:    lw s2, 60(a5)
+; RV32IZCMP-NEXT:    lw s3, 64(a5)
+; RV32IZCMP-NEXT:    lw s4, 68(a5)
+; RV32IZCMP-NEXT:    lw s5, 72(a5)
+; RV32IZCMP-NEXT:    lw s6, 76(a5)
+; RV32IZCMP-NEXT:    lw s7, 80(a5)
+; RV32IZCMP-NEXT:    lw s8, 84(a5)
+; RV32IZCMP-NEXT:    lw s9, 88(a5)
+; RV32IZCMP-NEXT:    lw s10, 92(a5)
+; RV32IZCMP-NEXT:    lw s11, 96(a5)
+; RV32IZCMP-NEXT:    lw ra, 100(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a4, 108(a5)
+; RV32IZCMP-NEXT:    lw a0, 124(a5)
+; RV32IZCMP-NEXT:    lw a1, 120(a5)
+; RV32IZCMP-NEXT:    lw a2, 116(a5)
+; RV32IZCMP-NEXT:    lw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a0, 124(a5)
+; RV32IZCMP-NEXT:    sw a1, 120(a5)
+; RV32IZCMP-NEXT:    sw a2, 116(a5)
+; RV32IZCMP-NEXT:    sw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a4, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
+; RV32IZCMP-NEXT:    sw ra, 100(a5)
+; RV32IZCMP-NEXT:    sw s11, 96(a5)
+; RV32IZCMP-NEXT:    sw s10, 92(a5)
+; RV32IZCMP-NEXT:    sw s9, 88(a5)
+; RV32IZCMP-NEXT:    sw s8, 84(a5)
+; RV32IZCMP-NEXT:    sw s7, 80(a5)
+; RV32IZCMP-NEXT:    sw s6, 76(a5)
+; RV32IZCMP-NEXT:    sw s5, 72(a5)
+; RV32IZCMP-NEXT:    sw s4, 68(a5)
+; RV32IZCMP-NEXT:    sw s3, 64(a5)
+; RV32IZCMP-NEXT:    sw s2, 60(a5)
+; RV32IZCMP-NEXT:    sw s1, 56(a5)
+; RV32IZCMP-NEXT:    sw s0, 52(a5)
+; RV32IZCMP-NEXT:    sw t6, 48(a5)
+; RV32IZCMP-NEXT:    sw t5, 44(a5)
+; RV32IZCMP-NEXT:    sw t4, 40(a5)
+; RV32IZCMP-NEXT:    sw t3, 36(a5)
+; RV32IZCMP-NEXT:    sw t2, 32(a5)
+; RV32IZCMP-NEXT:    sw t1, 28(a5)
+; RV32IZCMP-NEXT:    sw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 20(a5)
+; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 16(a5)
+; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(a7)
+; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(a7)
+; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(a7)
+; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var)(a7)
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 80
+;
 ; RV64I-LABEL: callee:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -160
@@ -471,6 +558,89 @@
 ; RV64I-WITH-FP-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    addi sp, sp, 160
 ; RV64I-WITH-FP-NEXT:    ret
+;
+; RV64IZCMP-LABEL: callee:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
+; RV64IZCMP-NEXT:    lui a7, %hi(var)
+; RV64IZCMP-NEXT:    lw a0, %lo(var)(a7)
+; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(a7)
+; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(a7)
+; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(a7)
+; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    addi a5, a7, %lo(var)
+; RV64IZCMP-NEXT:    lw a0, 16(a5)
+; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 20(a5)
+; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t0, 24(a5)
+; RV64IZCMP-NEXT:    lw t1, 28(a5)
+; RV64IZCMP-NEXT:    lw t2, 32(a5)
+; RV64IZCMP-NEXT:    lw t3, 36(a5)
+; RV64IZCMP-NEXT:    lw t4, 40(a5)
+; RV64IZCMP-NEXT:    lw t5, 44(a5)
+; RV64IZCMP-NEXT:    lw t6, 48(a5)
+; RV64IZCMP-NEXT:    lw s0, 52(a5)
+; RV64IZCMP-NEXT:    lw s1, 56(a5)
+; RV64IZCMP-NEXT:    lw s2, 60(a5)
+; RV64IZCMP-NEXT:    lw s3, 64(a5)
+; RV64IZCMP-NEXT:    lw s4, 68(a5)
+; RV64IZCMP-NEXT:    lw s5, 72(a5)
+; RV64IZCMP-NEXT:    lw s6, 76(a5)
+; RV64IZCMP-NEXT:    lw s7, 80(a5)
+; RV64IZCMP-NEXT:    lw s8, 84(a5)
+; RV64IZCMP-NEXT:    lw s9, 88(a5)
+; RV64IZCMP-NEXT:    lw s10, 92(a5)
+; RV64IZCMP-NEXT:    lw s11, 96(a5)
+; RV64IZCMP-NEXT:    lw ra, 100(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a4, 108(a5)
+; RV64IZCMP-NEXT:    lw a0, 124(a5)
+; RV64IZCMP-NEXT:    lw a1, 120(a5)
+; RV64IZCMP-NEXT:    lw a2, 116(a5)
+; RV64IZCMP-NEXT:    lw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a0, 124(a5)
+; RV64IZCMP-NEXT:    sw a1, 120(a5)
+; RV64IZCMP-NEXT:    sw a2, 116(a5)
+; RV64IZCMP-NEXT:    sw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a4, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
+; RV64IZCMP-NEXT:    sw ra, 100(a5)
+; RV64IZCMP-NEXT:    sw s11, 96(a5)
+; RV64IZCMP-NEXT:    sw s10, 92(a5)
+; RV64IZCMP-NEXT:    sw s9, 88(a5)
+; RV64IZCMP-NEXT:    sw s8, 84(a5)
+; RV64IZCMP-NEXT:    sw s7, 80(a5)
+; RV64IZCMP-NEXT:    sw s6, 76(a5)
+; RV64IZCMP-NEXT:    sw s5, 72(a5)
+; RV64IZCMP-NEXT:    sw s4, 68(a5)
+; RV64IZCMP-NEXT:    sw s3, 64(a5)
+; RV64IZCMP-NEXT:    sw s2, 60(a5)
+; RV64IZCMP-NEXT:    sw s1, 56(a5)
+; RV64IZCMP-NEXT:    sw s0, 52(a5)
+; RV64IZCMP-NEXT:    sw t6, 48(a5)
+; RV64IZCMP-NEXT:    sw t5, 44(a5)
+; RV64IZCMP-NEXT:    sw t4, 40(a5)
+; RV64IZCMP-NEXT:    sw t3, 36(a5)
+; RV64IZCMP-NEXT:    sw t2, 32(a5)
+; RV64IZCMP-NEXT:    sw t1, 28(a5)
+; RV64IZCMP-NEXT:    sw t0, 24(a5)
+; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 20(a5)
+; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 16(a5)
+; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(a7)
+; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(a7)
+; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(a7)
+; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var)(a7)
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
   %val = load [32 x i32], [32 x i32]* @var
   store volatile [32 x i32] %val, [32 x i32]* @var
   ret void
@@ -769,6 +939,124 @@
 ; RV32I-WITH-FP-NEXT:    addi sp, sp, 144
 ; RV32I-WITH-FP-NEXT:    ret
 ;
+; RV32IZCMP-LABEL: caller:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -112
+; RV32IZCMP-NEXT:    addi sp, sp, -32
+; RV32IZCMP-NEXT:    lui s0, %hi(var)
+; RV32IZCMP-NEXT:    lw a0, %lo(var)(s0)
+; RV32IZCMP-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(s0)
+; RV32IZCMP-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(s0)
+; RV32IZCMP-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(s0)
+; RV32IZCMP-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    addi s5, s0, %lo(var)
+; RV32IZCMP-NEXT:    lw a0, 16(s5)
+; RV32IZCMP-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 20(s5)
+; RV32IZCMP-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 24(s5)
+; RV32IZCMP-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 28(s5)
+; RV32IZCMP-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 32(s5)
+; RV32IZCMP-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 36(s5)
+; RV32IZCMP-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 40(s5)
+; RV32IZCMP-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 44(s5)
+; RV32IZCMP-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 48(s5)
+; RV32IZCMP-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 52(s5)
+; RV32IZCMP-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 56(s5)
+; RV32IZCMP-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 60(s5)
+; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 64(s5)
+; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 68(s5)
+; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 72(s5)
+; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 76(s5)
+; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 80(s5)
+; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 84(s5)
+; RV32IZCMP-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw s3, 88(s5)
+; RV32IZCMP-NEXT:    lw s4, 92(s5)
+; RV32IZCMP-NEXT:    lw s6, 96(s5)
+; RV32IZCMP-NEXT:    lw s7, 100(s5)
+; RV32IZCMP-NEXT:    lw s8, 104(s5)
+; RV32IZCMP-NEXT:    lw s9, 108(s5)
+; RV32IZCMP-NEXT:    lw s10, 112(s5)
+; RV32IZCMP-NEXT:    lw s11, 116(s5)
+; RV32IZCMP-NEXT:    lw s1, 120(s5)
+; RV32IZCMP-NEXT:    lw s2, 124(s5)
+; RV32IZCMP-NEXT:    call callee@plt
+; RV32IZCMP-NEXT:    sw s2, 124(s5)
+; RV32IZCMP-NEXT:    sw s1, 120(s5)
+; RV32IZCMP-NEXT:    sw s11, 116(s5)
+; RV32IZCMP-NEXT:    sw s10, 112(s5)
+; RV32IZCMP-NEXT:    sw s9, 108(s5)
+; RV32IZCMP-NEXT:    sw s8, 104(s5)
+; RV32IZCMP-NEXT:    sw s7, 100(s5)
+; RV32IZCMP-NEXT:    sw s6, 96(s5)
+; RV32IZCMP-NEXT:    sw s4, 92(s5)
+; RV32IZCMP-NEXT:    sw s3, 88(s5)
+; RV32IZCMP-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 84(s5)
+; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 80(s5)
+; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 76(s5)
+; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 72(s5)
+; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 68(s5)
+; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 64(s5)
+; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 60(s5)
+; RV32IZCMP-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 56(s5)
+; RV32IZCMP-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 52(s5)
+; RV32IZCMP-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 48(s5)
+; RV32IZCMP-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 44(s5)
+; RV32IZCMP-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 40(s5)
+; RV32IZCMP-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 36(s5)
+; RV32IZCMP-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 32(s5)
+; RV32IZCMP-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 28(s5)
+; RV32IZCMP-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 24(s5)
+; RV32IZCMP-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 20(s5)
+; RV32IZCMP-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 16(s5)
+; RV32IZCMP-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(s0)
+; RV32IZCMP-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(s0)
+; RV32IZCMP-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(s0)
+; RV32IZCMP-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var)(s0)
+; RV32IZCMP-NEXT:    addi sp, sp, 32
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 112
+;
 ; RV64I-LABEL: caller:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -288
@@ -1057,7 +1345,124 @@
 ; RV64I-WITH-FP-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    addi sp, sp, 288
 ; RV64I-WITH-FP-NEXT:    ret
-
+;
+; RV64IZCMP-LABEL: caller:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
+; RV64IZCMP-NEXT:    addi sp, sp, -128
+; RV64IZCMP-NEXT:    lui s0, %hi(var)
+; RV64IZCMP-NEXT:    lw a0, %lo(var)(s0)
+; RV64IZCMP-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(s0)
+; RV64IZCMP-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(s0)
+; RV64IZCMP-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(s0)
+; RV64IZCMP-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    addi s5, s0, %lo(var)
+; RV64IZCMP-NEXT:    lw a0, 16(s5)
+; RV64IZCMP-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 20(s5)
+; RV64IZCMP-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 24(s5)
+; RV64IZCMP-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 28(s5)
+; RV64IZCMP-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 32(s5)
+; RV64IZCMP-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 36(s5)
+; RV64IZCMP-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 40(s5)
+; RV64IZCMP-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 44(s5)
+; RV64IZCMP-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 48(s5)
+; RV64IZCMP-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 52(s5)
+; RV64IZCMP-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 56(s5)
+; RV64IZCMP-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 60(s5)
+; RV64IZCMP-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 64(s5)
+; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 68(s5)
+; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 72(s5)
+; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 76(s5)
+; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 80(s5)
+; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 84(s5)
+; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw s3, 88(s5)
+; RV64IZCMP-NEXT:    lw s4, 92(s5)
+; RV64IZCMP-NEXT:    lw s6, 96(s5)
+; RV64IZCMP-NEXT:    lw s7, 100(s5)
+; RV64IZCMP-NEXT:    lw s8, 104(s5)
+; RV64IZCMP-NEXT:    lw s9, 108(s5)
+; RV64IZCMP-NEXT:    lw s10, 112(s5)
+; RV64IZCMP-NEXT:    lw s11, 116(s5)
+; RV64IZCMP-NEXT:    lw s1, 120(s5)
+; RV64IZCMP-NEXT:    lw s2, 124(s5)
+; RV64IZCMP-NEXT:    call callee@plt
+; RV64IZCMP-NEXT:    sw s2, 124(s5)
+; RV64IZCMP-NEXT:    sw s1, 120(s5)
+; RV64IZCMP-NEXT:    sw s11, 116(s5)
+; RV64IZCMP-NEXT:    sw s10, 112(s5)
+; RV64IZCMP-NEXT:    sw s9, 108(s5)
+; RV64IZCMP-NEXT:    sw s8, 104(s5)
+; RV64IZCMP-NEXT:    sw s7, 100(s5)
+; RV64IZCMP-NEXT:    sw s6, 96(s5)
+; RV64IZCMP-NEXT:    sw s4, 92(s5)
+; RV64IZCMP-NEXT:    sw s3, 88(s5)
+; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 84(s5)
+; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 80(s5)
+; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 76(s5)
+; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 72(s5)
+; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 68(s5)
+; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 64(s5)
+; RV64IZCMP-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 60(s5)
+; RV64IZCMP-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 56(s5)
+; RV64IZCMP-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 52(s5)
+; RV64IZCMP-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 48(s5)
+; RV64IZCMP-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 44(s5)
+; RV64IZCMP-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 40(s5)
+; RV64IZCMP-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 36(s5)
+; RV64IZCMP-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 32(s5)
+; RV64IZCMP-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 28(s5)
+; RV64IZCMP-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 24(s5)
+; RV64IZCMP-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 20(s5)
+; RV64IZCMP-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 16(s5)
+; RV64IZCMP-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(s0)
+; RV64IZCMP-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(s0)
+; RV64IZCMP-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(s0)
+; RV64IZCMP-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var)(s0)
+; RV64IZCMP-NEXT:    addi sp, sp, 128
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
   %val = load [32 x i32], [32 x i32]* @var
   call void @callee()
   store volatile [32 x i32] %val, [32 x i32]* @var
diff --git a/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll
@@ -0,0 +1,163 @@
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=VALID,VALID32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=VALID
+
+
+; Function Attrs: nounwind
+define dso_local i32 @cmva(i32 %num, i32 %f, i32 %d, i32 %dx) local_unnamed_addr #0 {
+;  VALID-LABEL: cmva:
+;  VALID: cm.mva01s {{s[0-7]}}, {{s[0-7]}}
+;  VALID-NOT: cm.mva01s {{a.}}, {{a.}}
+entry:
+  %mul = mul nsw i32 %dx, %d
+  %sub = sub nsw i32 %mul, %dx
+  %add = add nsw i32 %mul, %d
+  %mul2 = mul nsw i32 %sub, %dx
+  %add3 = add nsw i32 %add, %mul2
+  %mul4 = mul nsw i32 %add3, %d
+  %add6 = add nsw i32 %add3, %num
+  %add5 = add i32 %sub, %f
+  %add7 = add i32 %add5, %mul4
+  ret i32 %add7
+}
+
+declare i64 @llvm.cttz.i64(i64, i1 immarg)
+
+define i64 @cmvs32(i64 %a) nounwind {
+;  VALID32-LABEL: cmvs32:
+;  VALID32: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}}
+;  VALID32-NOT: cm.mvsa01 {{a.}}, {{a.}}
+  %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %tmp
+}
+
+declare void @hoge()
+define void @cmvs64(i32 signext %arg, i32 signext %arg1) nounwind {
+;  VALID64-LABEL: cmvs:
+;  VALID64: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}}
+;  VALID64-NOT: cm.mvsa01 {{a.}}, {{a.}}
+bb:
+  %tmp = icmp eq i32 %arg, %arg1
+  br i1 %tmp, label %bb6, label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ]
+  tail call void @hoge()
+  %tmp4 = add nsw i32 %tmp3, 1
+  %tmp5 = icmp eq i32 %tmp4, %arg1
+  br i1 %tmp5, label %bb6, label %bb2
+
+bb6:                                              ; preds = %bb2, %bb
+  ret void
+}
+
+%struct.trie = type { [26 x %struct.trie*], i8 }
+
+@word = external global i8*
+
+declare i32 @trie_new(%struct.trie*)
+declare i32 @trie_search(i8*, i32, %struct.trie**)
+declare i64 @strnlen(i8*, i64)
+
+; Function Attrs: nounwind optnone
+define i32 @mvas_2() {
+  ;  VALID64-LABEL: mvas_2:
+  ;  VALID64-NOT: cm.mva01s {{a.}}, {{s.}}
+  ;  VALID64-NOT: cm.mva01s {{s.}}, {{a.}}
+entry:
+  %trie = alloca %struct.trie*
+  %0 = bitcast %struct.trie** %trie to i8*
+  store %struct.trie* null, %struct.trie** %trie
+  %call = tail call i32 @trie_new(%struct.trie* null)
+  %1 = load i8*, i8** @word
+  %call1 = tail call i64 @strnlen(i8* %1, i64 100)
+  %conv = trunc i64 %call1 to i32
+  %call2 = call i32 @trie_search(i8* %1, i32 %conv, %struct.trie** %trie)
+  ret i32 %call2
+}
+
+declare i32 @foo(i32, i32)
+
+; Function Attrs: nounwind optnone
+define dso_local i32 @cm_mvas_same_src(i32 %0, i32 %1, i32 %2, i32 %3) {
+ ;  VALID32-LABEL: cm_mvas_same_src:
+ ;  VALID32: cm.mva01s	s0, s0
+ ;
+ ;  VALID64-LABEL: cm_mvas_same_src:
+ ;  VALID64: cm.mva01s	s0, s0
+entry:
+  %4 = call i32 @foo(i32 %3, i32 %2)
+  %5 = add i32 %4, %2
+  %6 = call i32 @foo(i32 %3, i32 %3)
+  %add = add i32 %5, %6
+  ret i32 %add
+}
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+%struct.Node = type { i8*, i64, %struct.Node*, %struct.Node* }
+
+declare i8* @malloc(i64)
+
+declare i32 @fgetc(%struct._IO_FILE*nocapture)
+
+declare %struct.Node* @addWordToTree(i8*, %struct.Node*)
+
+; Function Attrs: nounwind optsize
+define %struct.Node* @cmmv_a1_come_first(%struct._IO_FILE*nocapture %file, %struct.Node* %root) {
+ ;  VALID32-LABEL: cmmv_a1_come_first:
+ ;  VALID32: cm.push	{ra, s0-s5}, -32
+ ;  VALID32-NEXT: .cfi_offset ra, -4
+ ;  VALID32-NEXT: .cfi_offset s0, -8
+ ;  VALID32-NEXT: .cfi_offset s1, -12
+ ;  VALID32-NEXT: .cfi_offset s2, -16
+ ;  VALID32-NEXT: .cfi_offset s3, -20
+ ;  VALID32-NEXT: .cfi_offset s4, -24
+ ;  VALID32-NEXT: .cfi_offset s5, -28
+ ;  VALID32-NEXT: cm.mvsa01	s1, s0
+
+ ;  VALID64-LABEL: cmmv_a1_come_first:
+ ;  VALID64: cm.push	{ra, s0-s5}, -64
+ ;  VALID64-NEXT: .cfi_offset ra, -8
+ ;  VALID64-NEXT: .cfi_offset s0, -16
+ ;  VALID64-NEXT: .cfi_offset s1, -24
+ ;  VALID64-NEXT: .cfi_offset s2, -32
+ ;  VALID64-NEXT: .cfi_offset s3, -40
+ ;  VALID64-NEXT: .cfi_offset s4, -48
+ ;  VALID64-NEXT: .cfi_offset s5, -56
+ ;  VALID64-NEXT: cm.mvsa01	s2, s0
+entry:
+  %call = tail call dereferenceable_or_null(46) i8* @malloc(i64 46)
+  %arrayidx = getelementptr inbounds i8, i8* %call, i64 -1
+  %call117 = tail call i32 @fgetc(%struct._IO_FILE* %file)
+  %sext.mask18 = and i32 %call117, 255
+  %cmp.not19 = icmp eq i32 %sext.mask18, 255
+  br i1 %cmp.not19, label %while.end, label %land.lhs.true.preheader
+
+land.lhs.true.preheader:
+  %arrayidx921 = getelementptr inbounds i8, i8* %call, i64 255
+  store i8 0, i8* %arrayidx921
+  %call1022 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %root)
+  %call123 = tail call i32 @fgetc(%struct._IO_FILE* %file)
+  %sext.mask24 = and i32 %call123, 255
+  %cmp.not25 = icmp eq i32 %sext.mask24, 255
+  br i1 %cmp.not25, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge
+
+land.lhs.true.land.lhs.true_crit_edge:
+  %call1026 = phi %struct.Node* [ %call10, %land.lhs.true.land.lhs.true_crit_edge ], [ %call1022, %land.lhs.true.preheader ]
+  %.pre = load i8, i8* %arrayidx
+  %cmp6.not = icmp eq i8 %.pre, 39
+  %spec.select = select i1 %cmp6.not, i64 0, i64 255
+  %arrayidx9 = getelementptr inbounds i8, i8* %call, i64 %spec.select
+  store i8 0, i8* %arrayidx9
+  %call10 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %call1026)
+  %call1 = tail call i32 @fgetc(%struct._IO_FILE* %file)
+  %sext.mask = and i32 %call1, 255
+  %cmp.not = icmp eq i32 %sext.mask, 255
+  br i1 %cmp.not, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge
+
+while.end:
+  %root.addr.0.lcssa = phi %struct.Node* [ %root, %entry ], [ %call1022, %land.lhs.true.preheader ], [ %call10, %land.lhs.true.land.lhs.true_crit_edge ]
+  ret %struct.Node* %root.addr.0.lcssa
+}
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -0,0 +1,1815 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; NOTE: Check cm.push/cm.pop.
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV32IZCMP
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefixes=RV64IZCMP
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64I %s
+
+declare void @test(i8*)
+
+; Function Attrs: optnone
+define i32 @foo() {
+; RV32IZCMP-LABEL: foo:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra}, -64
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 528
+; RV32IZCMP-NEXT:    addi sp, sp, -464
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    addi a0, sp, 12
+; RV32IZCMP-NEXT:    call test@plt
+; RV32IZCMP-NEXT:    addi sp, sp, 464
+; RV32IZCMP-NEXT:    cm.popretz {ra}, 64
+;
+; RV64IZCMP-LABEL: foo:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra}, -64
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 528
+; RV64IZCMP-NEXT:    addi sp, sp, -464
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    addi a0, sp, 8
+; RV64IZCMP-NEXT:    call test@plt
+; RV64IZCMP-NEXT:    addi sp, sp, 464
+; RV64IZCMP-NEXT:    cm.popretz {ra}, 64
+;
+; RV32I-LABEL: foo:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -528
+; RV32I-NEXT:    .cfi_def_cfa_offset 528
+; RV32I-NEXT:    sw ra, 524(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    addi a0, sp, 12
+; RV32I-NEXT:    call test@plt
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    lw ra, 524(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 528
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: foo:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -528
+; RV64I-NEXT:    .cfi_def_cfa_offset 528
+; RV64I-NEXT:    sd ra, 520(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    addi a0, sp, 8
+; RV64I-NEXT:    call test@plt
+; RV64I-NEXT:    li a0, 0
+; RV64I-NEXT:    ld ra, 520(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 528
+; RV64I-NEXT:    ret
+  %1 = alloca [512 x i8]
+  %2 = getelementptr [512 x i8], [512 x i8]* %1, i32 0, i32 0
+  call void @test(i8* %2)
+  ret i32 0
+}
+
+define dso_local i32 @pushpopret0(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: pushpopret0:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub a0, sp, a0
+; RV32IZCMP-NEXT:    mv sp, a0
+; RV32IZCMP-NEXT:    call callee_void@plt
+; RV32IZCMP-NEXT:    addi sp, s0, -16
+; RV32IZCMP-NEXT:    cm.popretz {ra, s0}, 16
+;
+; RV64IZCMP-LABEL: pushpopret0:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    addi s0, sp, 16
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub a0, sp, a0
+; RV64IZCMP-NEXT:    mv sp, a0
+; RV64IZCMP-NEXT:    call callee_void@plt
+; RV64IZCMP-NEXT:    addi sp, s0, -16
+; RV64IZCMP-NEXT:    cm.popretz {ra, s0}, 16
+;
+; RV32I-LABEL: pushpopret0:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    call callee_void@plt
+; RV32I-NEXT:    li a0, 0
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: pushpopret0:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    addi s0, sp, 16
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    call callee_void@plt
+; RV64I-NEXT:    li a0, 0
+; RV64I-NEXT:    addi sp, s0, -16
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  call void @callee_void(i8* nonnull %0)
+  ret i32 0
+}
+
+define dso_local i32 @pushpopret1(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: pushpopret1:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub a0, sp, a0
+; RV32IZCMP-NEXT:    mv sp, a0
+; RV32IZCMP-NEXT:    call callee_void@plt
+; RV32IZCMP-NEXT:    li a0, 1
+; RV32IZCMP-NEXT:    addi sp, s0, -16
+; RV32IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV64IZCMP-LABEL: pushpopret1:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    addi s0, sp, 16
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub a0, sp, a0
+; RV64IZCMP-NEXT:    mv sp, a0
+; RV64IZCMP-NEXT:    call callee_void@plt
+; RV64IZCMP-NEXT:    li a0, 1
+; RV64IZCMP-NEXT:    addi sp, s0, -16
+; RV64IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV32I-LABEL: pushpopret1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    call callee_void@plt
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: pushpopret1:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    addi s0, sp, 16
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    call callee_void@plt
+; RV64I-NEXT:    li a0, 1
+; RV64I-NEXT:    addi sp, s0, -16
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  call void @callee_void(i8* nonnull %0)
+  ret i32 1
+}
+
+define dso_local i32 @pushpopretneg1(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: pushpopretneg1:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub a0, sp, a0
+; RV32IZCMP-NEXT:    mv sp, a0
+; RV32IZCMP-NEXT:    call callee_void@plt
+; RV32IZCMP-NEXT:    li a0, -1
+; RV32IZCMP-NEXT:    addi sp, s0, -16
+; RV32IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV64IZCMP-LABEL: pushpopretneg1:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    addi s0, sp, 16
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub a0, sp, a0
+; RV64IZCMP-NEXT:    mv sp, a0
+; RV64IZCMP-NEXT:    call callee_void@plt
+; RV64IZCMP-NEXT:    li a0, -1
+; RV64IZCMP-NEXT:    addi sp, s0, -16
+; RV64IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV32I-LABEL: pushpopretneg1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    call callee_void@plt
+; RV32I-NEXT:    li a0, -1
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: pushpopretneg1:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    addi s0, sp, 16
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    call callee_void@plt
+; RV64I-NEXT:    li a0, -1
+; RV64I-NEXT:    addi sp, s0, -16
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  call void @callee_void(i8* nonnull %0)
+  ret i32 -1
+}
+
+define dso_local i32 @pushpopret2(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: pushpopret2:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub a0, sp, a0
+; RV32IZCMP-NEXT:    mv sp, a0
+; RV32IZCMP-NEXT:    call callee_void@plt
+; RV32IZCMP-NEXT:    li a0, 2
+; RV32IZCMP-NEXT:    addi sp, s0, -16
+; RV32IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV64IZCMP-LABEL: pushpopret2:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    addi s0, sp, 16
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub a0, sp, a0
+; RV64IZCMP-NEXT:    mv sp, a0
+; RV64IZCMP-NEXT:    call callee_void@plt
+; RV64IZCMP-NEXT:    li a0, 2
+; RV64IZCMP-NEXT:    addi sp, s0, -16
+; RV64IZCMP-NEXT:    cm.popret {ra, s0}, 16
+;
+; RV32I-LABEL: pushpopret2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    call callee_void@plt
+; RV32I-NEXT:    li a0, 2
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: pushpopret2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    addi s0, sp, 16
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    call callee_void@plt
+; RV64I-NEXT:    li a0, 2
+; RV64I-NEXT:    addi sp, s0, -16
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  call void @callee_void(i8* nonnull %0)
+  ret i32 2
+}
+
+define dso_local i32 @tailcall(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: tailcall:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub a0, sp, a0
+; RV32IZCMP-NEXT:    mv sp, a0
+; RV32IZCMP-NEXT:    addi sp, s0, -16
+; RV32IZCMP-NEXT:    cm.pop {ra, s0}, 16
+; RV32IZCMP-NEXT:    tail callee@plt
+;
+; RV64IZCMP-LABEL: tailcall:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 16
+; RV64IZCMP-NEXT:    cm.push {ra, s0}, -16
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    addi s0, sp, 16
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub a0, sp, a0
+; RV64IZCMP-NEXT:    mv sp, a0
+; RV64IZCMP-NEXT:    addi sp, s0, -16
+; RV64IZCMP-NEXT:    cm.pop {ra, s0}, 16
+; RV64IZCMP-NEXT:    tail callee@plt
+;
+; RV32I-LABEL: tailcall:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    tail callee@plt
+;
+; RV64I-LABEL: tailcall:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    addi s0, sp, 16
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    addi sp, s0, -16
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    tail callee@plt
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  %1 = tail call i32 @callee(i8* nonnull %0)
+  ret i32 %1
+}
+
+@var = global [5 x i32] zeroinitializer
+define dso_local i32 @nocompress(i32 signext %size) local_unnamed_addr #0 {
+; RV32IZCMP-LABEL: nocompress:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 48
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s8}, -48
+; RV32IZCMP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    .cfi_offset s1, -12
+; RV32IZCMP-NEXT:    .cfi_offset s2, -16
+; RV32IZCMP-NEXT:    .cfi_offset s3, -20
+; RV32IZCMP-NEXT:    .cfi_offset s4, -24
+; RV32IZCMP-NEXT:    .cfi_offset s5, -28
+; RV32IZCMP-NEXT:    .cfi_offset s6, -32
+; RV32IZCMP-NEXT:    .cfi_offset s7, -36
+; RV32IZCMP-NEXT:    .cfi_offset s8, -40
+; RV32IZCMP-NEXT:    addi s0, sp, 48
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    addi a0, a0, 15
+; RV32IZCMP-NEXT:    andi a0, a0, -16
+; RV32IZCMP-NEXT:    sub s1, sp, a0
+; RV32IZCMP-NEXT:    mv sp, s1
+; RV32IZCMP-NEXT:    lui s2, %hi(var)
+; RV32IZCMP-NEXT:    lw s3, %lo(var)(s2)
+; RV32IZCMP-NEXT:    lw s4, %lo(var+4)(s2)
+; RV32IZCMP-NEXT:    lw s5, %lo(var+8)(s2)
+; RV32IZCMP-NEXT:    lw s6, %lo(var+12)(s2)
+; RV32IZCMP-NEXT:    addi s7, s2, %lo(var)
+; RV32IZCMP-NEXT:    lw s8, 16(s7)
+; RV32IZCMP-NEXT:    mv a0, s1
+; RV32IZCMP-NEXT:    call callee_void@plt
+; RV32IZCMP-NEXT:    sw s8, 16(s7)
+; RV32IZCMP-NEXT:    sw s6, %lo(var+12)(s2)
+; RV32IZCMP-NEXT:    sw s5, %lo(var+8)(s2)
+; RV32IZCMP-NEXT:    sw s4, %lo(var+4)(s2)
+; RV32IZCMP-NEXT:    sw s3, %lo(var)(s2)
+; RV32IZCMP-NEXT:    mv a0, s1
+; RV32IZCMP-NEXT:    addi sp, s0, -48
+; RV32IZCMP-NEXT:    cm.pop {ra, s0-s8}, 48
+; RV32IZCMP-NEXT:    tail callee@plt
+;
+; RV64IZCMP-LABEL: nocompress:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 80
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s8}, -80
+; RV64IZCMP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    .cfi_offset s1, -24
+; RV64IZCMP-NEXT:    .cfi_offset s2, -32
+; RV64IZCMP-NEXT:    .cfi_offset s3, -40
+; RV64IZCMP-NEXT:    .cfi_offset s4, -48
+; RV64IZCMP-NEXT:    .cfi_offset s5, -56
+; RV64IZCMP-NEXT:    .cfi_offset s6, -64
+; RV64IZCMP-NEXT:    .cfi_offset s7, -72
+; RV64IZCMP-NEXT:    .cfi_offset s8, -80
+; RV64IZCMP-NEXT:    addi s0, sp, 80
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    slli a0, a0, 32
+; RV64IZCMP-NEXT:    srli a0, a0, 32
+; RV64IZCMP-NEXT:    addi a0, a0, 15
+; RV64IZCMP-NEXT:    andi a0, a0, -16
+; RV64IZCMP-NEXT:    sub s1, sp, a0
+; RV64IZCMP-NEXT:    mv sp, s1
+; RV64IZCMP-NEXT:    lui s2, %hi(var)
+; RV64IZCMP-NEXT:    lw s3, %lo(var)(s2)
+; RV64IZCMP-NEXT:    lw s4, %lo(var+4)(s2)
+; RV64IZCMP-NEXT:    lw s5, %lo(var+8)(s2)
+; RV64IZCMP-NEXT:    lw s6, %lo(var+12)(s2)
+; RV64IZCMP-NEXT:    addi s7, s2, %lo(var)
+; RV64IZCMP-NEXT:    lw s8, 16(s7)
+; RV64IZCMP-NEXT:    mv a0, s1
+; RV64IZCMP-NEXT:    call callee_void@plt
+; RV64IZCMP-NEXT:    sw s8, 16(s7)
+; RV64IZCMP-NEXT:    sw s6, %lo(var+12)(s2)
+; RV64IZCMP-NEXT:    sw s5, %lo(var+8)(s2)
+; RV64IZCMP-NEXT:    sw s4, %lo(var+4)(s2)
+; RV64IZCMP-NEXT:    sw s3, %lo(var)(s2)
+; RV64IZCMP-NEXT:    mv a0, s1
+; RV64IZCMP-NEXT:    addi sp, s0, -80
+; RV64IZCMP-NEXT:    cm.pop {ra, s0-s8}, 80
+; RV64IZCMP-NEXT:    tail callee@plt
+;
+; RV32I-LABEL: nocompress:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    .cfi_def_cfa_offset 48
+; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    .cfi_offset s1, -12
+; RV32I-NEXT:    .cfi_offset s2, -16
+; RV32I-NEXT:    .cfi_offset s3, -20
+; RV32I-NEXT:    .cfi_offset s4, -24
+; RV32I-NEXT:    .cfi_offset s5, -28
+; RV32I-NEXT:    .cfi_offset s6, -32
+; RV32I-NEXT:    .cfi_offset s7, -36
+; RV32I-NEXT:    .cfi_offset s8, -40
+; RV32I-NEXT:    addi s0, sp, 48
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub s1, sp, a0
+; RV32I-NEXT:    mv sp, s1
+; RV32I-NEXT:    lui s2, %hi(var)
+; RV32I-NEXT:    lw s3, %lo(var)(s2)
+; RV32I-NEXT:    lw s4, %lo(var+4)(s2)
+; RV32I-NEXT:    lw s5, %lo(var+8)(s2)
+; RV32I-NEXT:    lw s6, %lo(var+12)(s2)
+; RV32I-NEXT:    addi s7, s2, %lo(var)
+; RV32I-NEXT:    lw s8, 16(s7)
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    call callee_void@plt
+; RV32I-NEXT:    sw s8, 16(s7)
+; RV32I-NEXT:    sw s6, %lo(var+12)(s2)
+; RV32I-NEXT:    sw s5, %lo(var+8)(s2)
+; RV32I-NEXT:    sw s4, %lo(var+4)(s2)
+; RV32I-NEXT:    sw s3, %lo(var)(s2)
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    addi sp, s0, -48
+; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    tail callee@plt
+;
+; RV64I-LABEL: nocompress:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -80
+; RV64I-NEXT:    .cfi_def_cfa_offset 80
+; RV64I-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    .cfi_offset s1, -24
+; RV64I-NEXT:    .cfi_offset s2, -32
+; RV64I-NEXT:    .cfi_offset s3, -40
+; RV64I-NEXT:    .cfi_offset s4, -48
+; RV64I-NEXT:    .cfi_offset s5, -56
+; RV64I-NEXT:    .cfi_offset s6, -64
+; RV64I-NEXT:    .cfi_offset s7, -72
+; RV64I-NEXT:    .cfi_offset s8, -80
+; RV64I-NEXT:    addi s0, sp, 80
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub s1, sp, a0
+; RV64I-NEXT:    mv sp, s1
+; RV64I-NEXT:    lui s2, %hi(var)
+; RV64I-NEXT:    lw s3, %lo(var)(s2)
+; RV64I-NEXT:    lw s4, %lo(var+4)(s2)
+; RV64I-NEXT:    lw s5, %lo(var+8)(s2)
+; RV64I-NEXT:    lw s6, %lo(var+12)(s2)
+; RV64I-NEXT:    addi s7, s2, %lo(var)
+; RV64I-NEXT:    lw s8, 16(s7)
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    call callee_void@plt
+; RV64I-NEXT:    sw s8, 16(s7)
+; RV64I-NEXT:    sw s6, %lo(var+12)(s2)
+; RV64I-NEXT:    sw s5, %lo(var+8)(s2)
+; RV64I-NEXT:    sw s4, %lo(var+4)(s2)
+; RV64I-NEXT:    sw s3, %lo(var)(s2)
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    addi sp, s0, -80
+; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 80
+; RV64I-NEXT:    tail callee@plt
+entry:
+  %0 = alloca i8, i32 %size, align 16
+  %val = load [5 x i32], [5 x i32]* @var
+  call void @callee_void(i8* nonnull %0)
+  store volatile [5 x i32] %val, [5 x i32]* @var
+  %1 = tail call i32 @callee(i8* nonnull %0)
+  ret i32 %1
+}
+
+declare void @callee_void(i8*)
+declare i32 @callee(i8*)
+
+declare i32 @foo_test_irq(...)
+@var_test_irq = global [32 x i32] zeroinitializer
+
+define void @foo_with_irq() nounwind "interrupt"="user" {
+; RV32IZCMP-LABEL: foo_with_irq:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra}, -64
+; RV32IZCMP-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    call foo_test_irq@plt
+; RV32IZCMP-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    cm.pop {ra}, 64
+; RV32IZCMP-NEXT:    uret
+;
+; RV64IZCMP-LABEL: foo_with_irq:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra}, -64
+; RV64IZCMP-NEXT:    addi sp, sp, -64
+; RV64IZCMP-NEXT:    sd t0, 116(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t1, 108(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t2, 100(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a0, 92(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a1, 84(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a2, 76(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a3, 68(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a4, 60(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a5, 52(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a6, 44(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a7, 36(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t3, 28(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t4, 20(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t5, 12(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t6, 4(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    call foo_test_irq@plt
+; RV64IZCMP-NEXT:    ld t6, 4(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t5, 12(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t4, 20(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t3, 28(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a7, 36(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a6, 44(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a5, 52(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a4, 60(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a3, 68(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a2, 76(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a1, 84(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a0, 92(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t2, 100(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t1, 108(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t0, 116(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    addi sp, sp, 64
+; RV64IZCMP-NEXT:    cm.pop {ra}, 64
+; RV64IZCMP-NEXT:    uret
+;
+; RV32I-LABEL: foo_with_irq:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -64
+; RV32I-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call foo_test_irq@plt
+; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 64
+; RV32I-NEXT:    uret
+;
+; RV64I-LABEL: foo_with_irq:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -128
+; RV64I-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t1, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a2, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a6, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a7, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t3, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t4, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t5, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t6, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call foo_test_irq@plt
+; RV64I-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t0, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t1, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t2, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a3, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a4, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a5, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a6, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a7, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t5, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t6, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 128
+; RV64I-NEXT:    uret
+  %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)()
+  ret void
+}
+
+define void @foo_no_irq() nounwind{
+; RV32IZCMP-LABEL: foo_no_irq:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra}, -16
+; RV32IZCMP-NEXT:    call foo_test_irq@plt
+; RV32IZCMP-NEXT:    cm.popret {ra}, 16
+;
+; RV64IZCMP-LABEL: foo_no_irq:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra}, -16
+; RV64IZCMP-NEXT:    call foo_test_irq@plt
+; RV64IZCMP-NEXT:    cm.popret {ra}, 16
+;
+; RV32I-LABEL: foo_no_irq:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call foo_test_irq@plt
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: foo_no_irq:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call foo_test_irq@plt
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)()
+  ret void
+}
+
+define void @callee_with_irq() nounwind "interrupt"="user" {
+; RV32IZCMP-LABEL: callee_with_irq:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -112
+; RV32IZCMP-NEXT:    addi sp, sp, -32
+; RV32IZCMP-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t1, 84(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t2, 80(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a1, 72(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a3, 64(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a4, 60(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a5, 56(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a6, 52(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a7, 48(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t3, 44(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t4, 40(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t5, 36(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t6, 32(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lui a7, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, 16(a5)
+; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 20(a5)
+; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw t1, 28(a5)
+; RV32IZCMP-NEXT:    lw t2, 32(a5)
+; RV32IZCMP-NEXT:    lw t3, 36(a5)
+; RV32IZCMP-NEXT:    lw t4, 40(a5)
+; RV32IZCMP-NEXT:    lw t5, 44(a5)
+; RV32IZCMP-NEXT:    lw t6, 48(a5)
+; RV32IZCMP-NEXT:    lw s0, 52(a5)
+; RV32IZCMP-NEXT:    lw s1, 56(a5)
+; RV32IZCMP-NEXT:    lw s2, 60(a5)
+; RV32IZCMP-NEXT:    lw s3, 64(a5)
+; RV32IZCMP-NEXT:    lw s4, 68(a5)
+; RV32IZCMP-NEXT:    lw s5, 72(a5)
+; RV32IZCMP-NEXT:    lw s6, 76(a5)
+; RV32IZCMP-NEXT:    lw s7, 80(a5)
+; RV32IZCMP-NEXT:    lw s8, 84(a5)
+; RV32IZCMP-NEXT:    lw s9, 88(a5)
+; RV32IZCMP-NEXT:    lw s10, 92(a5)
+; RV32IZCMP-NEXT:    lw s11, 96(a5)
+; RV32IZCMP-NEXT:    lw ra, 100(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a4, 108(a5)
+; RV32IZCMP-NEXT:    lw a0, 124(a5)
+; RV32IZCMP-NEXT:    lw a1, 120(a5)
+; RV32IZCMP-NEXT:    lw a2, 116(a5)
+; RV32IZCMP-NEXT:    lw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a0, 124(a5)
+; RV32IZCMP-NEXT:    sw a1, 120(a5)
+; RV32IZCMP-NEXT:    sw a2, 116(a5)
+; RV32IZCMP-NEXT:    sw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a4, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
+; RV32IZCMP-NEXT:    sw ra, 100(a5)
+; RV32IZCMP-NEXT:    sw s11, 96(a5)
+; RV32IZCMP-NEXT:    sw s10, 92(a5)
+; RV32IZCMP-NEXT:    sw s9, 88(a5)
+; RV32IZCMP-NEXT:    sw s8, 84(a5)
+; RV32IZCMP-NEXT:    sw s7, 80(a5)
+; RV32IZCMP-NEXT:    sw s6, 76(a5)
+; RV32IZCMP-NEXT:    sw s5, 72(a5)
+; RV32IZCMP-NEXT:    sw s4, 68(a5)
+; RV32IZCMP-NEXT:    sw s3, 64(a5)
+; RV32IZCMP-NEXT:    sw s2, 60(a5)
+; RV32IZCMP-NEXT:    sw s1, 56(a5)
+; RV32IZCMP-NEXT:    sw s0, 52(a5)
+; RV32IZCMP-NEXT:    sw t6, 48(a5)
+; RV32IZCMP-NEXT:    sw t5, 44(a5)
+; RV32IZCMP-NEXT:    sw t4, 40(a5)
+; RV32IZCMP-NEXT:    sw t3, 36(a5)
+; RV32IZCMP-NEXT:    sw t2, 32(a5)
+; RV32IZCMP-NEXT:    sw t1, 28(a5)
+; RV32IZCMP-NEXT:    sw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 20(a5)
+; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 16(a5)
+; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV32IZCMP-NEXT:    lw t6, 32(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t5, 36(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t4, 40(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t3, 44(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a7, 48(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a6, 52(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a5, 56(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a4, 60(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a3, 64(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a2, 68(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a1, 72(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t2, 80(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t1, 84(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t0, 88(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    addi sp, sp, 32
+; RV32IZCMP-NEXT:    cm.pop {ra, s0-s11}, 112
+; RV32IZCMP-NEXT:    uret
+;
+; RV64IZCMP-LABEL: callee_with_irq:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
+; RV64IZCMP-NEXT:    addi sp, sp, -112
+; RV64IZCMP-NEXT:    sd t0, 212(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t1, 204(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t2, 196(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a0, 188(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a1, 180(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a2, 172(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a3, 164(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a4, 156(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a5, 148(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a6, 140(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a7, 132(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t3, 124(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t4, 116(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t5, 108(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t6, 100(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lui a7, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, 16(a5)
+; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 20(a5)
+; RV64IZCMP-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t0, 24(a5)
+; RV64IZCMP-NEXT:    lw t1, 28(a5)
+; RV64IZCMP-NEXT:    lw t2, 32(a5)
+; RV64IZCMP-NEXT:    lw t3, 36(a5)
+; RV64IZCMP-NEXT:    lw t4, 40(a5)
+; RV64IZCMP-NEXT:    lw t5, 44(a5)
+; RV64IZCMP-NEXT:    lw t6, 48(a5)
+; RV64IZCMP-NEXT:    lw s0, 52(a5)
+; RV64IZCMP-NEXT:    lw s1, 56(a5)
+; RV64IZCMP-NEXT:    lw s2, 60(a5)
+; RV64IZCMP-NEXT:    lw s3, 64(a5)
+; RV64IZCMP-NEXT:    lw s4, 68(a5)
+; RV64IZCMP-NEXT:    lw s5, 72(a5)
+; RV64IZCMP-NEXT:    lw s6, 76(a5)
+; RV64IZCMP-NEXT:    lw s7, 80(a5)
+; RV64IZCMP-NEXT:    lw s8, 84(a5)
+; RV64IZCMP-NEXT:    lw s9, 88(a5)
+; RV64IZCMP-NEXT:    lw s10, 92(a5)
+; RV64IZCMP-NEXT:    lw s11, 96(a5)
+; RV64IZCMP-NEXT:    lw ra, 100(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a4, 108(a5)
+; RV64IZCMP-NEXT:    lw a0, 124(a5)
+; RV64IZCMP-NEXT:    lw a1, 120(a5)
+; RV64IZCMP-NEXT:    lw a2, 116(a5)
+; RV64IZCMP-NEXT:    lw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a0, 124(a5)
+; RV64IZCMP-NEXT:    sw a1, 120(a5)
+; RV64IZCMP-NEXT:    sw a2, 116(a5)
+; RV64IZCMP-NEXT:    sw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a4, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
+; RV64IZCMP-NEXT:    sw ra, 100(a5)
+; RV64IZCMP-NEXT:    sw s11, 96(a5)
+; RV64IZCMP-NEXT:    sw s10, 92(a5)
+; RV64IZCMP-NEXT:    sw s9, 88(a5)
+; RV64IZCMP-NEXT:    sw s8, 84(a5)
+; RV64IZCMP-NEXT:    sw s7, 80(a5)
+; RV64IZCMP-NEXT:    sw s6, 76(a5)
+; RV64IZCMP-NEXT:    sw s5, 72(a5)
+; RV64IZCMP-NEXT:    sw s4, 68(a5)
+; RV64IZCMP-NEXT:    sw s3, 64(a5)
+; RV64IZCMP-NEXT:    sw s2, 60(a5)
+; RV64IZCMP-NEXT:    sw s1, 56(a5)
+; RV64IZCMP-NEXT:    sw s0, 52(a5)
+; RV64IZCMP-NEXT:    sw t6, 48(a5)
+; RV64IZCMP-NEXT:    sw t5, 44(a5)
+; RV64IZCMP-NEXT:    sw t4, 40(a5)
+; RV64IZCMP-NEXT:    sw t3, 36(a5)
+; RV64IZCMP-NEXT:    sw t2, 32(a5)
+; RV64IZCMP-NEXT:    sw t1, 28(a5)
+; RV64IZCMP-NEXT:    sw t0, 24(a5)
+; RV64IZCMP-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 20(a5)
+; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 16(a5)
+; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV64IZCMP-NEXT:    ld t6, 100(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t5, 108(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t4, 116(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t3, 124(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a7, 132(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a6, 140(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a5, 148(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a4, 156(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a3, 164(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a2, 172(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a1, 180(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a0, 188(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t2, 196(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t1, 204(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t0, 212(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    addi sp, sp, 112
+; RV64IZCMP-NEXT:    cm.pop {ra, s0-s11}, 160
+; RV64IZCMP-NEXT:    uret
+;
+; RV32I-LABEL: callee_with_irq:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -144
+; RV32I-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t0, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t1, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t2, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a2, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a4, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a5, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a6, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a7, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s10, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s11, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t3, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t4, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t5, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t6, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a7, %hi(var_test_irq)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV32I-NEXT:    lw a0, 16(a5)
+; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 20(a5)
+; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw t0, 24(a5)
+; RV32I-NEXT:    lw t1, 28(a5)
+; RV32I-NEXT:    lw t2, 32(a5)
+; RV32I-NEXT:    lw t3, 36(a5)
+; RV32I-NEXT:    lw t4, 40(a5)
+; RV32I-NEXT:    lw t5, 44(a5)
+; RV32I-NEXT:    lw t6, 48(a5)
+; RV32I-NEXT:    lw s0, 52(a5)
+; RV32I-NEXT:    lw s1, 56(a5)
+; RV32I-NEXT:    lw s2, 60(a5)
+; RV32I-NEXT:    lw s3, 64(a5)
+; RV32I-NEXT:    lw s4, 68(a5)
+; RV32I-NEXT:    lw s5, 72(a5)
+; RV32I-NEXT:    lw s6, 76(a5)
+; RV32I-NEXT:    lw s7, 80(a5)
+; RV32I-NEXT:    lw s8, 84(a5)
+; RV32I-NEXT:    lw s9, 88(a5)
+; RV32I-NEXT:    lw s10, 92(a5)
+; RV32I-NEXT:    lw s11, 96(a5)
+; RV32I-NEXT:    lw ra, 100(a5)
+; RV32I-NEXT:    lw a6, 104(a5)
+; RV32I-NEXT:    lw a4, 108(a5)
+; RV32I-NEXT:    lw a0, 124(a5)
+; RV32I-NEXT:    lw a1, 120(a5)
+; RV32I-NEXT:    lw a2, 116(a5)
+; RV32I-NEXT:    lw a3, 112(a5)
+; RV32I-NEXT:    sw a0, 124(a5)
+; RV32I-NEXT:    sw a1, 120(a5)
+; RV32I-NEXT:    sw a2, 116(a5)
+; RV32I-NEXT:    sw a3, 112(a5)
+; RV32I-NEXT:    sw a4, 108(a5)
+; RV32I-NEXT:    sw a6, 104(a5)
+; RV32I-NEXT:    sw ra, 100(a5)
+; RV32I-NEXT:    sw s11, 96(a5)
+; RV32I-NEXT:    sw s10, 92(a5)
+; RV32I-NEXT:    sw s9, 88(a5)
+; RV32I-NEXT:    sw s8, 84(a5)
+; RV32I-NEXT:    sw s7, 80(a5)
+; RV32I-NEXT:    sw s6, 76(a5)
+; RV32I-NEXT:    sw s5, 72(a5)
+; RV32I-NEXT:    sw s4, 68(a5)
+; RV32I-NEXT:    sw s3, 64(a5)
+; RV32I-NEXT:    sw s2, 60(a5)
+; RV32I-NEXT:    sw s1, 56(a5)
+; RV32I-NEXT:    sw s0, 52(a5)
+; RV32I-NEXT:    sw t6, 48(a5)
+; RV32I-NEXT:    sw t5, 44(a5)
+; RV32I-NEXT:    sw t4, 40(a5)
+; RV32I-NEXT:    sw t3, 36(a5)
+; RV32I-NEXT:    sw t2, 32(a5)
+; RV32I-NEXT:    sw t1, 28(a5)
+; RV32I-NEXT:    sw t0, 24(a5)
+; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 20(a5)
+; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 16(a5)
+; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t0, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t1, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t2, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a0, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a1, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a2, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a3, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a4, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a5, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t3, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t4, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t5, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t6, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 144
+; RV32I-NEXT:    uret
+;
+; RV64I-LABEL: callee_with_irq:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -272
+; RV64I-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t0, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t1, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t2, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a0, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a1, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a3, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a4, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a5, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a6, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a7, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t3, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t4, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t5, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t6, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a7, %hi(var_test_irq)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV64I-NEXT:    lw a0, 16(a5)
+; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 20(a5)
+; RV64I-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw t0, 24(a5)
+; RV64I-NEXT:    lw t1, 28(a5)
+; RV64I-NEXT:    lw t2, 32(a5)
+; RV64I-NEXT:    lw t3, 36(a5)
+; RV64I-NEXT:    lw t4, 40(a5)
+; RV64I-NEXT:    lw t5, 44(a5)
+; RV64I-NEXT:    lw t6, 48(a5)
+; RV64I-NEXT:    lw s0, 52(a5)
+; RV64I-NEXT:    lw s1, 56(a5)
+; RV64I-NEXT:    lw s2, 60(a5)
+; RV64I-NEXT:    lw s3, 64(a5)
+; RV64I-NEXT:    lw s4, 68(a5)
+; RV64I-NEXT:    lw s5, 72(a5)
+; RV64I-NEXT:    lw s6, 76(a5)
+; RV64I-NEXT:    lw s7, 80(a5)
+; RV64I-NEXT:    lw s8, 84(a5)
+; RV64I-NEXT:    lw s9, 88(a5)
+; RV64I-NEXT:    lw s10, 92(a5)
+; RV64I-NEXT:    lw s11, 96(a5)
+; RV64I-NEXT:    lw ra, 100(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a4, 108(a5)
+; RV64I-NEXT:    lw a0, 124(a5)
+; RV64I-NEXT:    lw a1, 120(a5)
+; RV64I-NEXT:    lw a2, 116(a5)
+; RV64I-NEXT:    lw a3, 112(a5)
+; RV64I-NEXT:    sw a0, 124(a5)
+; RV64I-NEXT:    sw a1, 120(a5)
+; RV64I-NEXT:    sw a2, 116(a5)
+; RV64I-NEXT:    sw a3, 112(a5)
+; RV64I-NEXT:    sw a4, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
+; RV64I-NEXT:    sw ra, 100(a5)
+; RV64I-NEXT:    sw s11, 96(a5)
+; RV64I-NEXT:    sw s10, 92(a5)
+; RV64I-NEXT:    sw s9, 88(a5)
+; RV64I-NEXT:    sw s8, 84(a5)
+; RV64I-NEXT:    sw s7, 80(a5)
+; RV64I-NEXT:    sw s6, 76(a5)
+; RV64I-NEXT:    sw s5, 72(a5)
+; RV64I-NEXT:    sw s4, 68(a5)
+; RV64I-NEXT:    sw s3, 64(a5)
+; RV64I-NEXT:    sw s2, 60(a5)
+; RV64I-NEXT:    sw s1, 56(a5)
+; RV64I-NEXT:    sw s0, 52(a5)
+; RV64I-NEXT:    sw t6, 48(a5)
+; RV64I-NEXT:    sw t5, 44(a5)
+; RV64I-NEXT:    sw t4, 40(a5)
+; RV64I-NEXT:    sw t3, 36(a5)
+; RV64I-NEXT:    sw t2, 32(a5)
+; RV64I-NEXT:    sw t1, 28(a5)
+; RV64I-NEXT:    sw t0, 24(a5)
+; RV64I-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 20(a5)
+; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 16(a5)
+; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV64I-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t2, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a2, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a3, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a6, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a7, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t3, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t4, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t5, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t6, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 272
+; RV64I-NEXT:    uret
+  %val = load [32 x i32], [32 x i32]* @var_test_irq
+  store volatile [32 x i32] %val, [32 x i32]* @var_test_irq
+  ret void
+}
+
+define void @callee_no_irq() nounwind{
+; RV32IZCMP-LABEL: callee_no_irq:
+; RV32IZCMP:       # %bb.0:
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -80
+; RV32IZCMP-NEXT:    lui a7, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, 16(a5)
+; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a0, 20(a5)
+; RV32IZCMP-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw t1, 28(a5)
+; RV32IZCMP-NEXT:    lw t2, 32(a5)
+; RV32IZCMP-NEXT:    lw t3, 36(a5)
+; RV32IZCMP-NEXT:    lw t4, 40(a5)
+; RV32IZCMP-NEXT:    lw t5, 44(a5)
+; RV32IZCMP-NEXT:    lw t6, 48(a5)
+; RV32IZCMP-NEXT:    lw s0, 52(a5)
+; RV32IZCMP-NEXT:    lw s1, 56(a5)
+; RV32IZCMP-NEXT:    lw s2, 60(a5)
+; RV32IZCMP-NEXT:    lw s3, 64(a5)
+; RV32IZCMP-NEXT:    lw s4, 68(a5)
+; RV32IZCMP-NEXT:    lw s5, 72(a5)
+; RV32IZCMP-NEXT:    lw s6, 76(a5)
+; RV32IZCMP-NEXT:    lw s7, 80(a5)
+; RV32IZCMP-NEXT:    lw s8, 84(a5)
+; RV32IZCMP-NEXT:    lw s9, 88(a5)
+; RV32IZCMP-NEXT:    lw s10, 92(a5)
+; RV32IZCMP-NEXT:    lw s11, 96(a5)
+; RV32IZCMP-NEXT:    lw ra, 100(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a4, 108(a5)
+; RV32IZCMP-NEXT:    lw a0, 124(a5)
+; RV32IZCMP-NEXT:    lw a1, 120(a5)
+; RV32IZCMP-NEXT:    lw a2, 116(a5)
+; RV32IZCMP-NEXT:    lw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a0, 124(a5)
+; RV32IZCMP-NEXT:    sw a1, 120(a5)
+; RV32IZCMP-NEXT:    sw a2, 116(a5)
+; RV32IZCMP-NEXT:    sw a3, 112(a5)
+; RV32IZCMP-NEXT:    sw a4, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
+; RV32IZCMP-NEXT:    sw ra, 100(a5)
+; RV32IZCMP-NEXT:    sw s11, 96(a5)
+; RV32IZCMP-NEXT:    sw s10, 92(a5)
+; RV32IZCMP-NEXT:    sw s9, 88(a5)
+; RV32IZCMP-NEXT:    sw s8, 84(a5)
+; RV32IZCMP-NEXT:    sw s7, 80(a5)
+; RV32IZCMP-NEXT:    sw s6, 76(a5)
+; RV32IZCMP-NEXT:    sw s5, 72(a5)
+; RV32IZCMP-NEXT:    sw s4, 68(a5)
+; RV32IZCMP-NEXT:    sw s3, 64(a5)
+; RV32IZCMP-NEXT:    sw s2, 60(a5)
+; RV32IZCMP-NEXT:    sw s1, 56(a5)
+; RV32IZCMP-NEXT:    sw s0, 52(a5)
+; RV32IZCMP-NEXT:    sw t6, 48(a5)
+; RV32IZCMP-NEXT:    sw t5, 44(a5)
+; RV32IZCMP-NEXT:    sw t4, 40(a5)
+; RV32IZCMP-NEXT:    sw t3, 36(a5)
+; RV32IZCMP-NEXT:    sw t2, 32(a5)
+; RV32IZCMP-NEXT:    sw t1, 28(a5)
+; RV32IZCMP-NEXT:    sw t0, 24(a5)
+; RV32IZCMP-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 20(a5)
+; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, 16(a5)
+; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 80
+;
+; RV64IZCMP-LABEL: callee_no_irq:
+; RV64IZCMP:       # %bb.0:
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
+; RV64IZCMP-NEXT:    lui a7, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, 16(a5)
+; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a0, 20(a5)
+; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t0, 24(a5)
+; RV64IZCMP-NEXT:    lw t1, 28(a5)
+; RV64IZCMP-NEXT:    lw t2, 32(a5)
+; RV64IZCMP-NEXT:    lw t3, 36(a5)
+; RV64IZCMP-NEXT:    lw t4, 40(a5)
+; RV64IZCMP-NEXT:    lw t5, 44(a5)
+; RV64IZCMP-NEXT:    lw t6, 48(a5)
+; RV64IZCMP-NEXT:    lw s0, 52(a5)
+; RV64IZCMP-NEXT:    lw s1, 56(a5)
+; RV64IZCMP-NEXT:    lw s2, 60(a5)
+; RV64IZCMP-NEXT:    lw s3, 64(a5)
+; RV64IZCMP-NEXT:    lw s4, 68(a5)
+; RV64IZCMP-NEXT:    lw s5, 72(a5)
+; RV64IZCMP-NEXT:    lw s6, 76(a5)
+; RV64IZCMP-NEXT:    lw s7, 80(a5)
+; RV64IZCMP-NEXT:    lw s8, 84(a5)
+; RV64IZCMP-NEXT:    lw s9, 88(a5)
+; RV64IZCMP-NEXT:    lw s10, 92(a5)
+; RV64IZCMP-NEXT:    lw s11, 96(a5)
+; RV64IZCMP-NEXT:    lw ra, 100(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a4, 108(a5)
+; RV64IZCMP-NEXT:    lw a0, 124(a5)
+; RV64IZCMP-NEXT:    lw a1, 120(a5)
+; RV64IZCMP-NEXT:    lw a2, 116(a5)
+; RV64IZCMP-NEXT:    lw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a0, 124(a5)
+; RV64IZCMP-NEXT:    sw a1, 120(a5)
+; RV64IZCMP-NEXT:    sw a2, 116(a5)
+; RV64IZCMP-NEXT:    sw a3, 112(a5)
+; RV64IZCMP-NEXT:    sw a4, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
+; RV64IZCMP-NEXT:    sw ra, 100(a5)
+; RV64IZCMP-NEXT:    sw s11, 96(a5)
+; RV64IZCMP-NEXT:    sw s10, 92(a5)
+; RV64IZCMP-NEXT:    sw s9, 88(a5)
+; RV64IZCMP-NEXT:    sw s8, 84(a5)
+; RV64IZCMP-NEXT:    sw s7, 80(a5)
+; RV64IZCMP-NEXT:    sw s6, 76(a5)
+; RV64IZCMP-NEXT:    sw s5, 72(a5)
+; RV64IZCMP-NEXT:    sw s4, 68(a5)
+; RV64IZCMP-NEXT:    sw s3, 64(a5)
+; RV64IZCMP-NEXT:    sw s2, 60(a5)
+; RV64IZCMP-NEXT:    sw s1, 56(a5)
+; RV64IZCMP-NEXT:    sw s0, 52(a5)
+; RV64IZCMP-NEXT:    sw t6, 48(a5)
+; RV64IZCMP-NEXT:    sw t5, 44(a5)
+; RV64IZCMP-NEXT:    sw t4, 40(a5)
+; RV64IZCMP-NEXT:    sw t3, 36(a5)
+; RV64IZCMP-NEXT:    sw t2, 32(a5)
+; RV64IZCMP-NEXT:    sw t1, 28(a5)
+; RV64IZCMP-NEXT:    sw t0, 24(a5)
+; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 20(a5)
+; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, 16(a5)
+; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
+;
+; RV32I-LABEL: callee_no_irq:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -80
+; RV32I-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a7, %hi(var_test_irq)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV32I-NEXT:    lw a0, 16(a5)
+; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 20(a5)
+; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw t0, 24(a5)
+; RV32I-NEXT:    lw t1, 28(a5)
+; RV32I-NEXT:    lw t2, 32(a5)
+; RV32I-NEXT:    lw t3, 36(a5)
+; RV32I-NEXT:    lw t4, 40(a5)
+; RV32I-NEXT:    lw t5, 44(a5)
+; RV32I-NEXT:    lw t6, 48(a5)
+; RV32I-NEXT:    lw s0, 52(a5)
+; RV32I-NEXT:    lw s1, 56(a5)
+; RV32I-NEXT:    lw s2, 60(a5)
+; RV32I-NEXT:    lw s3, 64(a5)
+; RV32I-NEXT:    lw s4, 68(a5)
+; RV32I-NEXT:    lw s5, 72(a5)
+; RV32I-NEXT:    lw s6, 76(a5)
+; RV32I-NEXT:    lw s7, 80(a5)
+; RV32I-NEXT:    lw s8, 84(a5)
+; RV32I-NEXT:    lw s9, 88(a5)
+; RV32I-NEXT:    lw s10, 92(a5)
+; RV32I-NEXT:    lw s11, 96(a5)
+; RV32I-NEXT:    lw ra, 100(a5)
+; RV32I-NEXT:    lw a6, 104(a5)
+; RV32I-NEXT:    lw a4, 108(a5)
+; RV32I-NEXT:    lw a0, 124(a5)
+; RV32I-NEXT:    lw a1, 120(a5)
+; RV32I-NEXT:    lw a2, 116(a5)
+; RV32I-NEXT:    lw a3, 112(a5)
+; RV32I-NEXT:    sw a0, 124(a5)
+; RV32I-NEXT:    sw a1, 120(a5)
+; RV32I-NEXT:    sw a2, 116(a5)
+; RV32I-NEXT:    sw a3, 112(a5)
+; RV32I-NEXT:    sw a4, 108(a5)
+; RV32I-NEXT:    sw a6, 104(a5)
+; RV32I-NEXT:    sw ra, 100(a5)
+; RV32I-NEXT:    sw s11, 96(a5)
+; RV32I-NEXT:    sw s10, 92(a5)
+; RV32I-NEXT:    sw s9, 88(a5)
+; RV32I-NEXT:    sw s8, 84(a5)
+; RV32I-NEXT:    sw s7, 80(a5)
+; RV32I-NEXT:    sw s6, 76(a5)
+; RV32I-NEXT:    sw s5, 72(a5)
+; RV32I-NEXT:    sw s4, 68(a5)
+; RV32I-NEXT:    sw s3, 64(a5)
+; RV32I-NEXT:    sw s2, 60(a5)
+; RV32I-NEXT:    sw s1, 56(a5)
+; RV32I-NEXT:    sw s0, 52(a5)
+; RV32I-NEXT:    sw t6, 48(a5)
+; RV32I-NEXT:    sw t5, 44(a5)
+; RV32I-NEXT:    sw t4, 40(a5)
+; RV32I-NEXT:    sw t3, 36(a5)
+; RV32I-NEXT:    sw t2, 32(a5)
+; RV32I-NEXT:    sw t1, 28(a5)
+; RV32I-NEXT:    sw t0, 24(a5)
+; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 20(a5)
+; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 16(a5)
+; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 80
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: callee_no_irq:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -160
+; RV64I-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a7, %hi(var_test_irq)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a7)
+; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a7)
+; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a7)
+; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a7)
+; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi a5, a7, %lo(var_test_irq)
+; RV64I-NEXT:    lw a0, 16(a5)
+; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 20(a5)
+; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw t0, 24(a5)
+; RV64I-NEXT:    lw t1, 28(a5)
+; RV64I-NEXT:    lw t2, 32(a5)
+; RV64I-NEXT:    lw t3, 36(a5)
+; RV64I-NEXT:    lw t4, 40(a5)
+; RV64I-NEXT:    lw t5, 44(a5)
+; RV64I-NEXT:    lw t6, 48(a5)
+; RV64I-NEXT:    lw s0, 52(a5)
+; RV64I-NEXT:    lw s1, 56(a5)
+; RV64I-NEXT:    lw s2, 60(a5)
+; RV64I-NEXT:    lw s3, 64(a5)
+; RV64I-NEXT:    lw s4, 68(a5)
+; RV64I-NEXT:    lw s5, 72(a5)
+; RV64I-NEXT:    lw s6, 76(a5)
+; RV64I-NEXT:    lw s7, 80(a5)
+; RV64I-NEXT:    lw s8, 84(a5)
+; RV64I-NEXT:    lw s9, 88(a5)
+; RV64I-NEXT:    lw s10, 92(a5)
+; RV64I-NEXT:    lw s11, 96(a5)
+; RV64I-NEXT:    lw ra, 100(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a4, 108(a5)
+; RV64I-NEXT:    lw a0, 124(a5)
+; RV64I-NEXT:    lw a1, 120(a5)
+; RV64I-NEXT:    lw a2, 116(a5)
+; RV64I-NEXT:    lw a3, 112(a5)
+; RV64I-NEXT:    sw a0, 124(a5)
+; RV64I-NEXT:    sw a1, 120(a5)
+; RV64I-NEXT:    sw a2, 116(a5)
+; RV64I-NEXT:    sw a3, 112(a5)
+; RV64I-NEXT:    sw a4, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
+; RV64I-NEXT:    sw ra, 100(a5)
+; RV64I-NEXT:    sw s11, 96(a5)
+; RV64I-NEXT:    sw s10, 92(a5)
+; RV64I-NEXT:    sw s9, 88(a5)
+; RV64I-NEXT:    sw s8, 84(a5)
+; RV64I-NEXT:    sw s7, 80(a5)
+; RV64I-NEXT:    sw s6, 76(a5)
+; RV64I-NEXT:    sw s5, 72(a5)
+; RV64I-NEXT:    sw s4, 68(a5)
+; RV64I-NEXT:    sw s3, 64(a5)
+; RV64I-NEXT:    sw s2, 60(a5)
+; RV64I-NEXT:    sw s1, 56(a5)
+; RV64I-NEXT:    sw s0, 52(a5)
+; RV64I-NEXT:    sw t6, 48(a5)
+; RV64I-NEXT:    sw t5, 44(a5)
+; RV64I-NEXT:    sw t4, 40(a5)
+; RV64I-NEXT:    sw t3, 36(a5)
+; RV64I-NEXT:    sw t2, 32(a5)
+; RV64I-NEXT:    sw t1, 28(a5)
+; RV64I-NEXT:    sw t0, 24(a5)
+; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 20(a5)
+; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 16(a5)
+; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a7)
+; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a7)
+; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a7)
+; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a7)
+; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 160
+; RV64I-NEXT:    ret
+  %val = load [32 x i32], [32 x i32]* @var_test_irq
+  store volatile [32 x i32] %val, [32 x i32]* @var_test_irq
+  ret void
+}