diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/CodeGen/MIRFormatter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -596,11 +597,12 @@
   /// NewDestBB. Optionally, insert the clobbered register restoring in \p
   /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
   /// the offset of the position to insert the new branch.
-  virtual void insertIndirectBranch(MachineBasicBlock &MBB,
-                                    MachineBasicBlock &NewDestBB,
-                                    MachineBasicBlock &RestoreBB,
-                                    const DebugLoc &DL, int64_t BrOffset = 0,
-                                    RegScavenger *RS = nullptr) const {
+  virtual void
+  insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB,
+                       MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+                       function_ref<MachineBasicBlock *(MachineBasicBlock *)>
+                           DeduplicateRestoreBB,
+                       int64_t BrOffset = 0, RegScavenger *RS = nullptr) const {
     llvm_unreachable("target did not implement");
   }
 
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -43,8 +43,7 @@
 namespace {
 
 class BranchRelaxation : public MachineFunctionPass {
-  /// BasicBlockInfo - Information about the offset and size of a single
-  /// basic block.
+  /// BasicBlockInfo - Information of a single basic block.
   struct BasicBlockInfo {
     /// Offset - Distance from the beginning of the function to the beginning
     /// of this basic block.
@@ -59,6 +58,13 @@
     /// beginning of the block, or from an aligned jump table at the end.
     unsigned Size = 0;
 
+    /// RestoreBlocks - Restore blocks for this basic block when this block
+    /// is a branch destination.
+    ///
+    /// This is used to deduplicate restore blocks. See deduplicateRestoreBlock
+    /// and fixupUnconditionalBranch for details.
+    SmallVector<MachineBasicBlock *, 1> RestoreBlocks;
+
     BasicBlockInfo() = default;
 
     /// Compute the offset immediately following this block. \p MBB is the next
@@ -74,6 +80,48 @@
       // can't tell whether or not it will insert nops. Assume that it will.
       return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
     }
+
+    /// Returns true if \p OldRestoreBB and \p NewRestoreBB are identical
+    /// restore blocks.
+    static bool isRestoreBlockIdentical(const MachineBasicBlock *OldRestoreBB,
+                                        const MachineBasicBlock *NewRestoreBB) {
+      MachineBasicBlock::const_iterator ItOld = OldRestoreBB->begin(),
+                                        ItEndOld = OldRestoreBB->end(),
+                                        ItNew = NewRestoreBB->begin(),
+                                        ItEndNew = NewRestoreBB->end();
+      while (true) {
+        // Since target shouldn't place branches in the restore block, the only
+        // possible branch is placed by fixupUnconditionalBranch when there are
+        // multiple different restore blocks. All but the last restore blocks
+        // has an unconditional branch to DestBB when they act as PrevBB.
+        // This branch doesn't take part in the check of whether two
+        // restore blocks are identical.
+        if (ItOld != ItEndOld && ItOld->isBranch())
+          ItEndOld = ItOld;
+        if (ItOld == ItEndOld || ItNew == ItEndNew)
+          return ItOld == ItEndOld && ItNew == ItEndNew;
+        if (!ItOld->isIdenticalTo(*ItNew))
+          return false;
+        ++ItOld;
+        ++ItNew;
+      }
+    }
+
+    /// Iterate over previously inserted restore blocks, returns the one that
+    /// can be reused. That is target can insert an unconditional branch
+    /// to it and fixupUnconditionalBranch can erase \p RestoreBB from
+    /// the function. If no reusable restore block is found, insert \p RestoreBB
+    /// to RestoreBlocks and returns it.
+    MachineBasicBlock *deduplicateRestoreBlock(MachineBasicBlock *RestoreBB) {
+      assert(!RestoreBB->empty() &&
+             "RestoreBB to be deduplicated should not be empty");
+      for (auto &DeduplicatedBB : RestoreBlocks)
+        if (isRestoreBlockIdentical(DeduplicatedBB, RestoreBB))
+          return DeduplicatedBB;
+
+      RestoreBlocks.push_back(RestoreBB);
+      return RestoreBB;
+    }
   };
 
   SmallVector<BasicBlockInfo, 16> BlockInfo;
@@ -468,42 +516,62 @@
   // be erased.
   MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
 
-  TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
-                            DestOffset - SrcOffset, RS.get());
+  // For multiple far branches to the same destination, there are chances
+  // that some restore blocks could be shared if they clobber the same
+  // registers and share the same restore sequence. We allow target to call
+  // DeduplicateRestoreBB to find a previously inserted restore block
+  // identical to RestoreBB and insert an unconditional branch to it,
+  // then we can erase RestoreBB from the function.
+  BasicBlockInfo &DestBBInfo = BlockInfo[DestBB->getNumber()];
+  MachineBasicBlock *DeduplicatedBB = RestoreBB;
+  auto DeduplicateRestoreBB = [&DestBBInfo,
+                               &DeduplicatedBB](MachineBasicBlock *RestoreBB) {
+    return DeduplicatedBB = DestBBInfo.deduplicateRestoreBlock(RestoreBB);
+  };
+
+  TII->insertIndirectBranch(*BranchBB, *DestBB, RestoreBB, DL,
+                            DeduplicateRestoreBB, DestOffset - SrcOffset,
+                            RS.get());
 
   BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
   adjustBlockOffsets(*MBB);
 
-  // If RestoreBB is required, try to place just before DestBB.
-  if (!RestoreBB->empty()) {
-    // TODO: For multiple far branches to the same destination, there are
-    // chances that some restore blocks could be shared if they clobber the
-    // same registers and share the same restore sequence. So far, those
-    // restore blocks are just duplicated for each far branch.
-    assert(!DestBB->isEntryBlock());
-    MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
-    if (auto *FT = PrevBB->getFallThrough()) {
-      assert(FT == DestBB);
-      TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
-      // Recalculate the block size.
-      BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
-    }
-    // Now, RestoreBB could be placed directly before DestBB.
-    MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
-    // Update successors and predecessors.
-    RestoreBB->addSuccessor(DestBB);
-    BranchBB->replaceSuccessor(DestBB, RestoreBB);
-    if (TRI->trackLivenessAfterRegAlloc(*MF))
-      computeAndAddLiveIns(LiveRegs, *RestoreBB);
-    // Compute the restore block size.
-    BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
-    // Update the offset starting from the previous block.
-    adjustBlockOffsets(*PrevBB);
-  } else {
-    // Remove restore block if it's not required.
+  if (DeduplicatedBB != RestoreBB)
+    BranchBB->replaceSuccessor(DestBB, DeduplicatedBB);
+  if (DeduplicatedBB != RestoreBB || RestoreBB->empty()) {
+    // Remove restore block if it is deduplicated or is not required.
     MF->erase(RestoreBB);
+    return true;
   }
 
+  // Restore block should not contain any branch
+  for (auto &RestoreI : *RestoreBB) {
+    assert(!RestoreI.isBranch() &&
+           "Restore blocks should not contain branch instruction");
+  }
+
+  // If RestoreBB is required, try to place just before DestBB.
+
+  assert(!DestBB->isEntryBlock());
+  MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
+  if (auto *FT = PrevBB->getFallThrough()) {
+    assert(FT == DestBB);
+    TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+    // Recalculate the block size.
+    BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+  }
+  // Now, RestoreBB could be placed directly before DestBB.
+  MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
+  // Update successors and predecessors.
+  RestoreBB->addSuccessor(DestBB);
+  BranchBB->replaceSuccessor(DestBB, RestoreBB);
+  if (TRI->trackLivenessAfterRegAlloc(*MF))
+    computeAndAddLiveIns(LiveRegs, *RestoreBB);
+  // Compute the restore block size.
+  BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
+  // Update the offset starting from the previous block.
+  adjustBlockOffsets(*PrevBB);
+
   return true;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -18,6 +18,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIRegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
@@ -277,10 +278,12 @@
 
   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
 
-  void insertIndirectBranch(MachineBasicBlock &MBB,
-                            MachineBasicBlock &NewDestBB,
-                            MachineBasicBlock &RestoreBB, const DebugLoc &DL,
-                            int64_t BrOffset, RegScavenger *RS) const override;
+  void
+  insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB,
+                       MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+                       function_ref<MachineBasicBlock *(MachineBasicBlock *)>
+                           DeduplicateRestoreBB,
+                       int64_t BrOffset, RegScavenger *RS) const override;
 
   bool analyzeBranchImpl(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2379,16 +2379,16 @@
   return MI.getOperand(0).getMBB();
 }
 
-void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
-                                       MachineBasicBlock &DestBB,
-                                       MachineBasicBlock &RestoreBB,
-                                       const DebugLoc &DL, int64_t BrOffset,
-                                       RegScavenger *RS) const {
+void SIInstrInfo::insertIndirectBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock &DestBB,
+    MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+    function_ref<MachineBasicBlock *(MachineBasicBlock *)> DeduplicateRestoreBB,
+    int64_t BrOffset, RegScavenger *RS) const {
   assert(RS && "RegScavenger required for long branching");
   assert(MBB.empty() &&
          "new block should be inserted for expanding unconditional branch");
   assert(MBB.pred_size() == 1);
-  assert(RestoreBB.empty() &&
+  assert(RestoreBB->empty() &&
          "restore block should be inserted for restoring clobbered registers");
 
   MachineFunction *MF = MBB.getParent();
@@ -2426,12 +2426,6 @@
   BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
     .addReg(PCReg);
 
-  // FIXME: If spilling is necessary, this will fail because this scavenger has
-  // no emergency stack slots. It is non-trivial to spill in this situation,
-  // because the restore code needs to be specially placed after the
-  // jump. BranchRelaxation then needs to be made aware of the newly inserted
-  // block.
-  //
   // If a spill is needed for the pc register pair, we need to insert a spill
   // restore block right before the destination block, and insert a short branch
   // into the old destination block's fallthrough predecessor.
@@ -2462,6 +2456,8 @@
   // dest_bb:
   //   buzz;
 
+  const MachineBasicBlock *BranchDest;
+
   RS->enterBasicBlockEnd(MBB);
   Register Scav = RS->scavengeRegisterBackwards(
       AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
@@ -2470,20 +2466,23 @@
     RS->setRegUsed(Scav);
     MRI.replaceRegWith(PCReg, Scav);
     MRI.clearVirtRegs();
+    BranchDest = &DestBB;
   } else {
     // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for
     // SGPR spill.
     const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
     const SIRegisterInfo *TRI = ST.getRegisterInfo();
-    TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
+
+    TRI->spillEmergencySGPR(GetPC, *RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
+    BranchDest = DeduplicateRestoreBB(RestoreBB);
+
     MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
     MRI.clearVirtRegs();
   }
 
-  MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol();
   // Now, the distance could be defined.
   auto *Offset = MCBinaryExpr::createSub(
-      MCSymbolRefExpr::create(DestLabel, MCCtx),
+      MCSymbolRefExpr::create(BranchDest->getSymbol(), MCCtx),
       MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
   // Add offset assignments.
   auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_AVR_INSTR_INFO_H
 #define LLVM_AVR_INSTR_INFO_H
 
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
 #include "AVRRegisterInfo.h"
@@ -107,10 +108,12 @@
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
 
-  void insertIndirectBranch(MachineBasicBlock &MBB,
-                            MachineBasicBlock &NewDestBB,
-                            MachineBasicBlock &RestoreBB, const DebugLoc &DL,
-                            int64_t BrOffset, RegScavenger *RS) const override;
+  void
+  insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB,
+                       MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+                       function_ref<MachineBasicBlock *(MachineBasicBlock *)>
+                           DeduplicateRestoreBB,
+                       int64_t BrOffset, RegScavenger *RS) const override;
 
 private:
   const AVRRegisterInfo RI;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -565,11 +565,11 @@
   }
 }
 
-void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
-                                        MachineBasicBlock &NewDestBB,
-                                        MachineBasicBlock &RestoreBB,
-                                        const DebugLoc &DL, int64_t BrOffset,
-                                        RegScavenger *RS) const {
+void AVRInstrInfo::insertIndirectBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB,
+    MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+    function_ref<MachineBasicBlock *(MachineBasicBlock *)> DeduplicateRestoreBB,
+    int64_t BrOffset, RegScavenger *RS) const {
   // This method inserts a *direct* branch (JMP), despite its name.
   // LLVM calls this method to fixup unconditional branches; it never calls
   // insertBranch or some hypothetical "insertDirectBranch".
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -14,6 +14,7 @@
 #define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H
 
 #include "RISCVRegisterInfo.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 
@@ -94,10 +95,12 @@
                         const DebugLoc &dl,
                         int *BytesAdded = nullptr) const override;
 
-  void insertIndirectBranch(MachineBasicBlock &MBB,
-                            MachineBasicBlock &NewDestBB,
-                            MachineBasicBlock &RestoreBB, const DebugLoc &DL,
-                            int64_t BrOffset, RegScavenger *RS) const override;
+  void
+  insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB,
+                       MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+                       function_ref<MachineBasicBlock *(MachineBasicBlock *)>
+                           DeduplicateRestoreBB,
+                       int64_t BrOffset, RegScavenger *RS) const override;
 
   unsigned removeBranch(MachineBasicBlock &MBB,
                         int *BytesRemoved = nullptr) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -951,16 +951,16 @@
   return 2;
 }
 
-void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
-                                          MachineBasicBlock &DestBB,
-                                          MachineBasicBlock &RestoreBB,
-                                          const DebugLoc &DL, int64_t BrOffset,
-                                          RegScavenger *RS) const {
+void RISCVInstrInfo::insertIndirectBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock &DestBB,
+    MachineBasicBlock *RestoreBB, const DebugLoc &DL,
+    function_ref<MachineBasicBlock *(MachineBasicBlock *)> DeduplicateRestoreBB,
+    int64_t BrOffset, RegScavenger *RS) const {
   assert(RS && "RegScavenger required for long branching");
   assert(MBB.empty() &&
          "new block should be inserted for expanding unconditional branch");
   assert(MBB.pred_size() == 1);
-  assert(RestoreBB.empty() &&
+  assert(RestoreBB->empty() &&
          "restore block should be inserted for restoring clobbered registers");
 
   MachineFunction *MF = MBB.getParent();
@@ -977,7 +977,7 @@
   // uses the same workaround).
   Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
   auto II = MBB.end();
-  // We may also update the jump target to RestoreBB later.
+  // We may also update the jump target later.
   MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
                           .addReg(ScratchReg, RegState::Define | RegState::Dead)
                           .addMBB(&DestBB, RISCVII::MO_CALL);
@@ -1004,12 +1004,13 @@
     TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
                              /*SpAdj=*/0, /*FIOperandNum=*/1);
 
-    MI.getOperand(1).setMBB(&RestoreBB);
-
-    loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
+    loadRegFromStackSlot(*RestoreBB, RestoreBB->end(), TmpGPR, FrameIndex,
                          &RISCV::GPRRegClass, TRI);
-    TRI->eliminateFrameIndex(RestoreBB.back(),
+    TRI->eliminateFrameIndex(RestoreBB->back(),
                              /*SpAdj=*/0, /*FIOperandNum=*/1);
+
+    MachineBasicBlock *JumpDest = DeduplicateRestoreBB(RestoreBB);
+    MI.getOperand(1).setMBB(JumpDest);
   }
 
   MRI.replaceRegWith(ScratchReg, TmpGPR);
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -o - %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=7 -o - %s | FileCheck %s
 
 define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
 ; CHECK-LABEL: spill:
@@ -344,6 +344,30 @@
 ; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    s_branch .LBB0_2
 ; CHECK-NEXT:  .LBB0_4: ; %bb3
@@ -773,10 +797,34 @@
   %cmp = icmp eq i32 %cnd, 0
   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
 
-bb2: ; 68 bytes
-  ; 64 byte asm
+bb2: ; 260 bytes
+  ; 256 byte asm
   call void asm sideeffect
    "v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
     v_nop_e64
     v_nop_e64
     v_nop_e64
@@ -1310,6 +1358,30 @@
 ; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    s_branch .LBB1_2
 ; CHECK-NEXT:  .LBB1_4: ; %bb3
@@ -1816,10 +1888,2310 @@
   %cmp = icmp eq i32 %cnd, 0
   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
 
-bb2: ; 68 bytes
-  ; 64 byte asm
+bb2: ; 260 bytes
+  ; 256 byte asm
   call void asm sideeffect
    "v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64",""() #0
+  br label %bb3
+
+bb3:
+  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
+  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
+  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
+  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
+  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
+  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
+  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
+  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
+  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
+  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
+  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
+  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
+  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
+  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
+  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
+  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
+  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
+  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
+  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
+  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
+  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
+  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
+  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
+  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
+  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
+  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
+  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
+  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
+  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
+  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
+  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
+  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
+  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
+  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
+  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
+  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
+  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
+  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
+  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
+  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
+  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
+  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
+  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
+  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
+  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
+  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
+  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
+  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
+  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
+  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
+  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
+  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
+  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
+  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
+  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
+  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
+  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
+  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
+  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
+  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
+  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
+  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
+  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
+  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
+  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
+  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
+  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
+  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
+  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
+  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
+  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
+  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
+  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
+  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
+  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
+  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
+  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
+  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
+  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
+  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
+  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
+  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
+  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
+  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
+  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
+  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
+  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
+  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
+  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
+  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
+  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
+  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
+  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
+  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
+  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
+  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
+  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
+  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
+  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
+  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
+  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
+  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
+  ret void
+}
+
+define amdgpu_kernel void @spill_duplicated_restore_block(i32 addrspace(1)* %arg, i32 %cnd) #0 {
+; CHECK-LABEL: spill_duplicated_restore_block:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dword s100, s[4:5], 0x2
+; CHECK-NEXT:    s_mov_b64 s[98:99], s[2:3]
+; CHECK-NEXT:    s_mov_b64 s[96:97], s[0:1]
+; CHECK-NEXT:    s_add_u32 s96, s96, s7
+; CHECK-NEXT:    s_addc_u32 s97, s97, 0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_writelane_b32 v0, s0, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s0, s100
+; CHECK-NEXT:    s_cmp_eq_u32 s100, 42
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s2, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s3, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s7, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s9, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s10, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s11, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s12, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s13, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s14, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s15, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s16, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s17, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s18, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s19, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s20, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s21, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s22, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s23, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s24, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s25, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s26, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s27, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s28, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s29, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s30, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s31, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s32, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s33, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s34, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s35, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s36, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s37, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s38, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s39, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s40, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s41, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s42, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s43, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s44, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s45, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s46, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s47, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s48, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s49, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s50, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s51, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s52, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s53, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s54, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s55, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s56, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s57, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s58, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s59, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s60, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s61, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s62, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s63, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s64, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s65, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s66, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s67, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s68, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s69, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s70, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s71, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s72, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s73, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s74, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s75, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s76, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s77, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s78, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s79, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s80, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s81, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s82, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s83, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s84, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s85, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s86, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s87, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s88, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s89, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s90, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s91, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s92, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s93, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s94, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s95, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s96, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s97, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s98, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s99, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s100, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s101, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 vcc_lo, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 vcc_hi, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_cbranch_scc0 .LBB2_1
+; CHECK-NEXT:  .LBB2_4: ; %entry
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:    buffer_store_dword v1, off, s[96:99], 0
+; CHECK-NEXT:    v_writelane_b32 v1, s0, 0
+; CHECK-NEXT:    v_writelane_b32 v1, s1, 1
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:  .Lpost_getpc2:
+; CHECK-NEXT:    s_add_u32 s0, s0, (.LBB2_5-.Lpost_getpc2)&4294967295
+; CHECK-NEXT:    s_addc_u32 s1, s1, (.LBB2_5-.Lpost_getpc2)>>32
+; CHECK-NEXT:    s_setpc_b64 s[0:1]
+; CHECK-NEXT:  .LBB2_1: ; %bb4
+; CHECK-NEXT:    s_cmpk_eq_i32 s0, 0x71
+; CHECK-NEXT:    s_cbranch_scc0 .LBB2_2
+; CHECK-NEXT:  .LBB2_6: ; %bb4
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:    buffer_store_dword v1, off, s[96:99], 0
+; CHECK-NEXT:    v_writelane_b32 v1, s0, 0
+; CHECK-NEXT:    v_writelane_b32 v1, s1, 1
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:  .Lpost_getpc3:
+; CHECK-NEXT:    s_add_u32 s0, s0, (.LBB2_5-.Lpost_getpc3)&4294967295
+; CHECK-NEXT:    s_addc_u32 s1, s1, (.LBB2_5-.Lpost_getpc3)>>32
+; CHECK-NEXT:    s_setpc_b64 s[0:1]
+; CHECK-NEXT:  .LBB2_2: ; %bb2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_branch .LBB2_3
+; CHECK-NEXT:  .LBB2_5: ; %bb3
+; CHECK-NEXT:    v_readlane_b32 s0, v1, 0
+; CHECK-NEXT:    v_readlane_b32 s1, v1, 1
+; CHECK-NEXT:    buffer_load_dword v1, off, s[96:99], 0
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:  .LBB2_3: ; %bb3
+; CHECK-NEXT:    v_readlane_b32 s0, v0, 0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s1
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s2
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s3
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s4
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s5
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s6
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s9
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s10
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s11
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s12
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s13
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s14
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s15
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s16
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s17
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s18
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s19
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s20
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s21
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s22
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s23
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s24
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s25
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s26
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s27
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s28
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s29
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s30
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s31
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s32
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s33
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s34
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s35
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s36
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s37
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s38
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s39
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s40
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s41
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s42
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s43
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s44
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s45
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s46
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s47
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s48
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s49
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s50
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s51
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s52
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s53
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s54
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s55
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s56
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s57
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s58
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s59
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s60
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s61
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s62
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s63
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s64
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s65
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s66
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s67
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s68
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s69
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s70
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s71
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s72
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s73
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s74
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s75
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s76
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s77
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s78
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s79
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s80
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s81
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s82
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s83
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s84
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s85
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s86
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s87
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s88
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s89
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s90
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s91
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s92
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s93
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s94
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s95
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s96
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s97
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s98
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s99
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s100
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s101
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use vcc_lo
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use vcc_hi
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
+entry:
+  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
+  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
+  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
+  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
+  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
+  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
+  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
+  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
+  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
+  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
+  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
+  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
+  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
+  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
+  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
+  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
+  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
+  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
+  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
+  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
+  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
+  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
+  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
+  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
+  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
+  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
+  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
+  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
+  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
+  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
+  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
+  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
+  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
+  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
+  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
+  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
+  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
+  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
+  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
+  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
+  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
+  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
+  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
+  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
+  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
+  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
+  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
+  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
+  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
+  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
+  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
+  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
+  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
+  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
+  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
+  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
+  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
+  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
+  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
+  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
+  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
+  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
+  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
+  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
+  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
+  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
+  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
+  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
+  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
+  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
+  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
+  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
+  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
+  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
+  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
+  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
+  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
+  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
+  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
+  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
+  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
+  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
+  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
+  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
+  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
+  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
+  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
+  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
+  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
+  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
+  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
+  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
+  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
+  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
+  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
+  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
+  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
+  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
+  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
+  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
+  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
+  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
+  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() #0
+  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() #0
+
+  %cmp = icmp eq i32 %cnd, 42
+  br i1 %cmp, label %bb3, label %bb4 ; +8 dword branch
+
+bb4:
+  %cmp4 = icmp eq i32 %cnd, 113
+  br i1 %cmp4, label %bb3, label %bb2 ; +8 dword branch
+
+bb2: ; 260 bytes
+  ; 256 byte asm
+  call void asm sideeffect
+   "v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64",""() #0
+  br label %bb3
+
+bb3:
+  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
+  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
+  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
+  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
+  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
+  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
+  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
+  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
+  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
+  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
+  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
+  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
+  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
+  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
+  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
+  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
+  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
+  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
+  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
+  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
+  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
+  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
+  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
+  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
+  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
+  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
+  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
+  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
+  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
+  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
+  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
+  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
+  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
+  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
+  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
+  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
+  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
+  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
+  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
+  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
+  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
+  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
+  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
+  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
+  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
+  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
+  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
+  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
+  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
+  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
+  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
+  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
+  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
+  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
+  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
+  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
+  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
+  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
+  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
+  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
+  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
+  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
+  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
+  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
+  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
+  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
+  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
+  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
+  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
+  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
+  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
+  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
+  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
+  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
+  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
+  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
+  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
+  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
+  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
+  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
+  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
+  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
+  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
+  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
+  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
+  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
+  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
+  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
+  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
+  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
+  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
+  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
+  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
+  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
+  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
+  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
+  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
+  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
+  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
+  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
+  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
+  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
+  ret void
+}
+
+define void @spill_func_duplicated_restore_block(i32 addrspace(1)* %arg) #0 {
+; CHECK-LABEL: spill_func_duplicated_restore_block:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt expcnt(1)
+; CHECK-NEXT:    v_writelane_b32 v0, s30, 0
+; CHECK-NEXT:    v_writelane_b32 v0, s31, 1
+; CHECK-NEXT:    v_writelane_b32 v0, s33, 2
+; CHECK-NEXT:    v_writelane_b32 v0, s34, 3
+; CHECK-NEXT:    v_writelane_b32 v0, s35, 4
+; CHECK-NEXT:    v_writelane_b32 v0, s36, 5
+; CHECK-NEXT:    v_writelane_b32 v0, s37, 6
+; CHECK-NEXT:    v_writelane_b32 v0, s38, 7
+; CHECK-NEXT:    v_writelane_b32 v0, s39, 8
+; CHECK-NEXT:    v_writelane_b32 v0, s40, 9
+; CHECK-NEXT:    v_writelane_b32 v0, s41, 10
+; CHECK-NEXT:    v_writelane_b32 v0, s42, 11
+; CHECK-NEXT:    v_writelane_b32 v0, s43, 12
+; CHECK-NEXT:    v_writelane_b32 v0, s44, 13
+; CHECK-NEXT:    v_writelane_b32 v0, s45, 14
+; CHECK-NEXT:    v_writelane_b32 v0, s46, 15
+; CHECK-NEXT:    v_writelane_b32 v0, s47, 16
+; CHECK-NEXT:    v_writelane_b32 v0, s48, 17
+; CHECK-NEXT:    v_writelane_b32 v0, s49, 18
+; CHECK-NEXT:    v_writelane_b32 v0, s50, 19
+; CHECK-NEXT:    v_writelane_b32 v0, s51, 20
+; CHECK-NEXT:    v_writelane_b32 v0, s52, 21
+; CHECK-NEXT:    v_writelane_b32 v0, s53, 22
+; CHECK-NEXT:    v_writelane_b32 v0, s54, 23
+; CHECK-NEXT:    v_writelane_b32 v0, s55, 24
+; CHECK-NEXT:    v_writelane_b32 v0, s56, 25
+; CHECK-NEXT:    v_writelane_b32 v0, s57, 26
+; CHECK-NEXT:    v_writelane_b32 v0, s58, 27
+; CHECK-NEXT:    v_writelane_b32 v0, s59, 28
+; CHECK-NEXT:    v_writelane_b32 v0, s60, 29
+; CHECK-NEXT:    v_writelane_b32 v0, s61, 30
+; CHECK-NEXT:    v_writelane_b32 v0, s62, 31
+; CHECK-NEXT:    v_writelane_b32 v0, s63, 32
+; CHECK-NEXT:    v_writelane_b32 v0, s64, 33
+; CHECK-NEXT:    v_writelane_b32 v0, s65, 34
+; CHECK-NEXT:    v_writelane_b32 v0, s66, 35
+; CHECK-NEXT:    v_writelane_b32 v0, s67, 36
+; CHECK-NEXT:    v_writelane_b32 v0, s68, 37
+; CHECK-NEXT:    v_writelane_b32 v0, s69, 38
+; CHECK-NEXT:    v_writelane_b32 v0, s70, 39
+; CHECK-NEXT:    v_writelane_b32 v0, s71, 40
+; CHECK-NEXT:    v_writelane_b32 v0, s72, 41
+; CHECK-NEXT:    v_writelane_b32 v0, s73, 42
+; CHECK-NEXT:    v_writelane_b32 v0, s74, 43
+; CHECK-NEXT:    v_writelane_b32 v0, s75, 44
+; CHECK-NEXT:    v_writelane_b32 v0, s76, 45
+; CHECK-NEXT:    v_writelane_b32 v0, s77, 46
+; CHECK-NEXT:    v_writelane_b32 v0, s78, 47
+; CHECK-NEXT:    v_writelane_b32 v0, s79, 48
+; CHECK-NEXT:    v_writelane_b32 v0, s80, 49
+; CHECK-NEXT:    v_writelane_b32 v0, s81, 50
+; CHECK-NEXT:    v_writelane_b32 v0, s82, 51
+; CHECK-NEXT:    v_writelane_b32 v0, s83, 52
+; CHECK-NEXT:    v_writelane_b32 v0, s84, 53
+; CHECK-NEXT:    v_writelane_b32 v0, s85, 54
+; CHECK-NEXT:    v_writelane_b32 v0, s86, 55
+; CHECK-NEXT:    v_writelane_b32 v0, s87, 56
+; CHECK-NEXT:    s_waitcnt expcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v1, s95, 0
+; CHECK-NEXT:    v_writelane_b32 v0, s88, 57
+; CHECK-NEXT:    v_writelane_b32 v1, s96, 1
+; CHECK-NEXT:    v_writelane_b32 v0, s89, 58
+; CHECK-NEXT:    v_writelane_b32 v1, s97, 2
+; CHECK-NEXT:    v_writelane_b32 v0, s90, 59
+; CHECK-NEXT:    v_writelane_b32 v1, s98, 3
+; CHECK-NEXT:    v_writelane_b32 v0, s91, 60
+; CHECK-NEXT:    v_writelane_b32 v1, s99, 4
+; CHECK-NEXT:    v_writelane_b32 v0, s92, 61
+; CHECK-NEXT:    v_writelane_b32 v1, s100, 5
+; CHECK-NEXT:    v_writelane_b32 v0, s93, 62
+; CHECK-NEXT:    v_writelane_b32 v1, s101, 6
+; CHECK-NEXT:    s_mov_b32 s34, s12
+; CHECK-NEXT:    v_writelane_b32 v0, s94, 63
+; CHECK-NEXT:    v_writelane_b32 v1, s34, 7
+; CHECK-NEXT:    s_cmp_eq_u32 s34, 42
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s2, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s3, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s6, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s7, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s9, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s10, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s11, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s12, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s13, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s14, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s15, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s16, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s17, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s18, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s19, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s20, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s21, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s22, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s23, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s24, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s25, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s26, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s27, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s28, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s29, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s30, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s31, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s32, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s33, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s34, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s35, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s36, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s37, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s38, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s39, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s40, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s41, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s42, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s43, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s44, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s45, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s46, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s47, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s48, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s49, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s50, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s51, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s52, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s53, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s54, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s55, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s56, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s57, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s58, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s59, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s60, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s61, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s62, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s63, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s64, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s65, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s66, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s67, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s68, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s69, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s70, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s71, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s72, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s73, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s74, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s75, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s76, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s77, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s78, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s79, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s80, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s81, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s82, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s83, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s84, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s85, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s86, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s87, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s88, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s89, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s90, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s91, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s92, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s93, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s94, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s95, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s96, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s97, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s98, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s99, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s100, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s101, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 vcc_lo, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 vcc_hi, 0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_cbranch_scc0 .LBB3_1
+; CHECK-NEXT:  .LBB3_5: ; %entry
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
+; CHECK-NEXT:    v_writelane_b32 v2, s0, 0
+; CHECK-NEXT:    v_writelane_b32 v2, s1, 1
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:  .Lpost_getpc4:
+; CHECK-NEXT:    s_add_u32 s0, s0, (.LBB3_6-.Lpost_getpc4)&4294967295
+; CHECK-NEXT:    s_addc_u32 s1, s1, (.LBB3_6-.Lpost_getpc4)>>32
+; CHECK-NEXT:    s_setpc_b64 s[0:1]
+; CHECK-NEXT:  .LBB3_1: ; %bb4
+; CHECK-NEXT:    v_writelane_b32 v1, vcc_hi, 8
+; CHECK-NEXT:    s_mov_b32 vcc_hi, vcc_lo
+; CHECK-NEXT:    s_mov_b32 vcc_lo, s101
+; CHECK-NEXT:    s_mov_b32 s101, s100
+; CHECK-NEXT:    s_mov_b32 s100, s99
+; CHECK-NEXT:    s_mov_b32 s99, s98
+; CHECK-NEXT:    s_mov_b32 s98, s97
+; CHECK-NEXT:    s_mov_b32 s97, s96
+; CHECK-NEXT:    s_mov_b32 s96, s95
+; CHECK-NEXT:    s_mov_b32 s95, s94
+; CHECK-NEXT:    s_mov_b32 s94, s93
+; CHECK-NEXT:    s_mov_b32 s93, s92
+; CHECK-NEXT:    s_mov_b32 s92, s91
+; CHECK-NEXT:    s_mov_b32 s91, s90
+; CHECK-NEXT:    s_mov_b32 s90, s89
+; CHECK-NEXT:    s_mov_b32 s89, s88
+; CHECK-NEXT:    s_mov_b32 s88, s87
+; CHECK-NEXT:    s_mov_b32 s87, s86
+; CHECK-NEXT:    s_mov_b32 s86, s85
+; CHECK-NEXT:    s_mov_b32 s85, s84
+; CHECK-NEXT:    s_mov_b32 s84, s83
+; CHECK-NEXT:    s_mov_b32 s83, s82
+; CHECK-NEXT:    s_mov_b32 s82, s81
+; CHECK-NEXT:    s_mov_b32 s81, s80
+; CHECK-NEXT:    s_mov_b32 s80, s79
+; CHECK-NEXT:    s_mov_b32 s79, s78
+; CHECK-NEXT:    s_mov_b32 s78, s77
+; CHECK-NEXT:    s_mov_b32 s77, s76
+; CHECK-NEXT:    s_mov_b32 s76, s75
+; CHECK-NEXT:    s_mov_b32 s75, s74
+; CHECK-NEXT:    s_mov_b32 s74, s73
+; CHECK-NEXT:    s_mov_b32 s73, s72
+; CHECK-NEXT:    s_mov_b32 s72, s71
+; CHECK-NEXT:    s_mov_b32 s71, s70
+; CHECK-NEXT:    s_mov_b32 s70, s69
+; CHECK-NEXT:    s_mov_b32 s69, s68
+; CHECK-NEXT:    s_mov_b32 s68, s67
+; CHECK-NEXT:    s_mov_b32 s67, s66
+; CHECK-NEXT:    s_mov_b32 s66, s65
+; CHECK-NEXT:    s_mov_b32 s65, s64
+; CHECK-NEXT:    s_mov_b32 s64, s63
+; CHECK-NEXT:    s_mov_b32 s63, s62
+; CHECK-NEXT:    s_mov_b32 s62, s61
+; CHECK-NEXT:    s_mov_b32 s61, s60
+; CHECK-NEXT:    s_mov_b32 s60, s59
+; CHECK-NEXT:    s_mov_b32 s59, s58
+; CHECK-NEXT:    s_mov_b32 s58, s57
+; CHECK-NEXT:    s_mov_b32 s57, s56
+; CHECK-NEXT:    s_mov_b32 s56, s55
+; CHECK-NEXT:    s_mov_b32 s55, s54
+; CHECK-NEXT:    s_mov_b32 s54, s53
+; CHECK-NEXT:    s_mov_b32 s53, s52
+; CHECK-NEXT:    s_mov_b32 s52, s51
+; CHECK-NEXT:    s_mov_b32 s51, s50
+; CHECK-NEXT:    s_mov_b32 s50, s49
+; CHECK-NEXT:    s_mov_b32 s49, s48
+; CHECK-NEXT:    s_mov_b32 s48, s47
+; CHECK-NEXT:    s_mov_b32 s47, s46
+; CHECK-NEXT:    s_mov_b32 s46, s45
+; CHECK-NEXT:    s_mov_b32 s45, s44
+; CHECK-NEXT:    s_mov_b32 s44, s43
+; CHECK-NEXT:    s_mov_b32 s43, s42
+; CHECK-NEXT:    s_mov_b32 s42, s41
+; CHECK-NEXT:    s_mov_b32 s41, s40
+; CHECK-NEXT:    s_mov_b32 s40, s39
+; CHECK-NEXT:    s_mov_b32 s39, s38
+; CHECK-NEXT:    s_mov_b32 s38, s37
+; CHECK-NEXT:    s_mov_b32 s37, s36
+; CHECK-NEXT:    s_mov_b32 s36, s35
+; CHECK-NEXT:    s_mov_b32 s35, s34
+; CHECK-NEXT:    s_mov_b32 s34, s31
+; CHECK-NEXT:    s_mov_b32 s31, s30
+; CHECK-NEXT:    s_mov_b32 s30, s29
+; CHECK-NEXT:    s_mov_b32 s29, s28
+; CHECK-NEXT:    s_mov_b32 s28, s27
+; CHECK-NEXT:    s_mov_b32 s27, s26
+; CHECK-NEXT:    s_mov_b32 s26, s25
+; CHECK-NEXT:    s_mov_b32 s25, s24
+; CHECK-NEXT:    s_mov_b32 s24, s23
+; CHECK-NEXT:    s_mov_b32 s23, s22
+; CHECK-NEXT:    s_mov_b32 s22, s21
+; CHECK-NEXT:    s_mov_b32 s21, s20
+; CHECK-NEXT:    s_mov_b32 s20, s19
+; CHECK-NEXT:    s_mov_b32 s19, s18
+; CHECK-NEXT:    s_mov_b32 s18, s17
+; CHECK-NEXT:    s_mov_b32 s17, s16
+; CHECK-NEXT:    s_mov_b32 s16, s15
+; CHECK-NEXT:    s_mov_b32 s15, s14
+; CHECK-NEXT:    s_mov_b32 s14, s13
+; CHECK-NEXT:    s_mov_b32 s13, s12
+; CHECK-NEXT:    s_mov_b32 s12, s11
+; CHECK-NEXT:    s_mov_b32 s11, s10
+; CHECK-NEXT:    s_mov_b32 s10, s9
+; CHECK-NEXT:    s_mov_b32 s9, s8
+; CHECK-NEXT:    s_mov_b32 s8, s7
+; CHECK-NEXT:    s_mov_b32 s7, s6
+; CHECK-NEXT:    s_mov_b32 s6, s5
+; CHECK-NEXT:    s_mov_b32 s5, s4
+; CHECK-NEXT:    v_readlane_b32 s4, v1, 7
+; CHECK-NEXT:    s_cmpk_eq_i32 s4, 0x71
+; CHECK-NEXT:    s_cbranch_scc0 .LBB3_2
+; CHECK-NEXT:  .LBB3_7: ; %bb4
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
+; CHECK-NEXT:    v_writelane_b32 v2, s0, 0
+; CHECK-NEXT:    v_writelane_b32 v2, s1, 1
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:  .Lpost_getpc5:
+; CHECK-NEXT:    s_add_u32 s0, s0, (.LBB3_8-.Lpost_getpc5)&4294967295
+; CHECK-NEXT:    s_addc_u32 s1, s1, (.LBB3_8-.Lpost_getpc5)>>32
+; CHECK-NEXT:    s_setpc_b64 s[0:1]
+; CHECK-NEXT:  .LBB3_2: ; %bb2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    v_nop_e64
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_branch .LBB3_3
+; CHECK-NEXT:  .LBB3_8: ; %bb3
+; CHECK-NEXT:    v_readlane_b32 s0, v2, 0
+; CHECK-NEXT:    v_readlane_b32 s1, v2, 1
+; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:8
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:  .LBB3_3: ; %Flow
+; CHECK-NEXT:    s_mov_b32 s4, s5
+; CHECK-NEXT:    s_mov_b32 s5, s6
+; CHECK-NEXT:    s_mov_b32 s6, s7
+; CHECK-NEXT:    s_mov_b32 s7, s8
+; CHECK-NEXT:    s_mov_b32 s8, s9
+; CHECK-NEXT:    s_mov_b32 s9, s10
+; CHECK-NEXT:    s_mov_b32 s10, s11
+; CHECK-NEXT:    s_mov_b32 s11, s12
+; CHECK-NEXT:    s_mov_b32 s12, s13
+; CHECK-NEXT:    s_mov_b32 s13, s14
+; CHECK-NEXT:    s_mov_b32 s14, s15
+; CHECK-NEXT:    s_mov_b32 s15, s16
+; CHECK-NEXT:    s_mov_b32 s16, s17
+; CHECK-NEXT:    s_mov_b32 s17, s18
+; CHECK-NEXT:    s_mov_b32 s18, s19
+; CHECK-NEXT:    s_mov_b32 s19, s20
+; CHECK-NEXT:    s_mov_b32 s20, s21
+; CHECK-NEXT:    s_mov_b32 s21, s22
+; CHECK-NEXT:    s_mov_b32 s22, s23
+; CHECK-NEXT:    s_mov_b32 s23, s24
+; CHECK-NEXT:    s_mov_b32 s24, s25
+; CHECK-NEXT:    s_mov_b32 s25, s26
+; CHECK-NEXT:    s_mov_b32 s26, s27
+; CHECK-NEXT:    s_mov_b32 s27, s28
+; CHECK-NEXT:    s_mov_b32 s28, s29
+; CHECK-NEXT:    s_mov_b32 s29, s30
+; CHECK-NEXT:    s_mov_b32 s30, s31
+; CHECK-NEXT:    s_mov_b32 s31, s34
+; CHECK-NEXT:    s_mov_b32 s34, s35
+; CHECK-NEXT:    s_mov_b32 s35, s36
+; CHECK-NEXT:    s_mov_b32 s36, s37
+; CHECK-NEXT:    s_mov_b32 s37, s38
+; CHECK-NEXT:    s_mov_b32 s38, s39
+; CHECK-NEXT:    s_mov_b32 s39, s40
+; CHECK-NEXT:    s_mov_b32 s40, s41
+; CHECK-NEXT:    s_mov_b32 s41, s42
+; CHECK-NEXT:    s_mov_b32 s42, s43
+; CHECK-NEXT:    s_mov_b32 s43, s44
+; CHECK-NEXT:    s_mov_b32 s44, s45
+; CHECK-NEXT:    s_mov_b32 s45, s46
+; CHECK-NEXT:    s_mov_b32 s46, s47
+; CHECK-NEXT:    s_mov_b32 s47, s48
+; CHECK-NEXT:    s_mov_b32 s48, s49
+; CHECK-NEXT:    s_mov_b32 s49, s50
+; CHECK-NEXT:    s_mov_b32 s50, s51
+; CHECK-NEXT:    s_mov_b32 s51, s52
+; CHECK-NEXT:    s_mov_b32 s52, s53
+; CHECK-NEXT:    s_mov_b32 s53, s54
+; CHECK-NEXT:    s_mov_b32 s54, s55
+; CHECK-NEXT:    s_mov_b32 s55, s56
+; CHECK-NEXT:    s_mov_b32 s56, s57
+; CHECK-NEXT:    s_mov_b32 s57, s58
+; CHECK-NEXT:    s_mov_b32 s58, s59
+; CHECK-NEXT:    s_mov_b32 s59, s60
+; CHECK-NEXT:    s_mov_b32 s60, s61
+; CHECK-NEXT:    s_mov_b32 s61, s62
+; CHECK-NEXT:    s_mov_b32 s62, s63
+; CHECK-NEXT:    s_mov_b32 s63, s64
+; CHECK-NEXT:    s_mov_b32 s64, s65
+; CHECK-NEXT:    s_mov_b32 s65, s66
+; CHECK-NEXT:    s_mov_b32 s66, s67
+; CHECK-NEXT:    s_mov_b32 s67, s68
+; CHECK-NEXT:    s_mov_b32 s68, s69
+; CHECK-NEXT:    s_mov_b32 s69, s70
+; CHECK-NEXT:    s_mov_b32 s70, s71
+; CHECK-NEXT:    s_mov_b32 s71, s72
+; CHECK-NEXT:    s_mov_b32 s72, s73
+; CHECK-NEXT:    s_mov_b32 s73, s74
+; CHECK-NEXT:    s_mov_b32 s74, s75
+; CHECK-NEXT:    s_mov_b32 s75, s76
+; CHECK-NEXT:    s_mov_b32 s76, s77
+; CHECK-NEXT:    s_mov_b32 s77, s78
+; CHECK-NEXT:    s_mov_b32 s78, s79
+; CHECK-NEXT:    s_mov_b32 s79, s80
+; CHECK-NEXT:    s_mov_b32 s80, s81
+; CHECK-NEXT:    s_mov_b32 s81, s82
+; CHECK-NEXT:    s_mov_b32 s82, s83
+; CHECK-NEXT:    s_mov_b32 s83, s84
+; CHECK-NEXT:    s_mov_b32 s84, s85
+; CHECK-NEXT:    s_mov_b32 s85, s86
+; CHECK-NEXT:    s_mov_b32 s86, s87
+; CHECK-NEXT:    s_mov_b32 s87, s88
+; CHECK-NEXT:    s_mov_b32 s88, s89
+; CHECK-NEXT:    s_mov_b32 s89, s90
+; CHECK-NEXT:    s_mov_b32 s90, s91
+; CHECK-NEXT:    s_mov_b32 s91, s92
+; CHECK-NEXT:    s_mov_b32 s92, s93
+; CHECK-NEXT:    s_mov_b32 s93, s94
+; CHECK-NEXT:    s_mov_b32 s94, s95
+; CHECK-NEXT:    s_mov_b32 s95, s96
+; CHECK-NEXT:    s_mov_b32 s96, s97
+; CHECK-NEXT:    s_mov_b32 s97, s98
+; CHECK-NEXT:    s_mov_b32 s98, s99
+; CHECK-NEXT:    s_mov_b32 s99, s100
+; CHECK-NEXT:    s_mov_b32 s100, s101
+; CHECK-NEXT:    s_mov_b32 s101, vcc_lo
+; CHECK-NEXT:    s_mov_b32 vcc_lo, vcc_hi
+; CHECK-NEXT:    v_readlane_b32 vcc_hi, v1, 8
+; CHECK-NEXT:    s_branch .LBB3_4
+; CHECK-NEXT:  .LBB3_6: ; %bb3
+; CHECK-NEXT:    v_readlane_b32 s0, v2, 0
+; CHECK-NEXT:    v_readlane_b32 s1, v2, 1
+; CHECK-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:8
+; CHECK-NEXT:    s_not_b64 exec, exec
+; CHECK-NEXT:  .LBB3_4: ; %bb3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s1
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s2
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s3
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s4
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s5
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s6
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s9
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s10
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s11
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s12
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s13
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s14
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s15
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s16
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s17
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s18
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s19
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s20
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s21
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s22
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s23
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s24
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s25
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s26
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s27
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s28
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s29
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s30
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s31
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s32
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s33
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s34
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s35
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s36
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s37
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s38
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s39
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s40
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s41
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s42
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s43
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s44
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s45
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s46
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s47
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s48
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s49
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s50
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s51
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s52
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s53
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s54
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s55
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s56
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s57
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s58
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s59
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s60
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s61
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s62
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s63
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s64
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s65
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s66
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s67
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s68
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s69
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s70
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s71
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s72
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s73
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s74
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s75
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s76
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s77
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s78
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s79
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s80
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s81
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s82
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s83
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s84
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s85
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s86
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s87
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s88
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s89
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s90
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s91
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s92
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s93
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s94
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s95
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s96
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s97
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s98
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s99
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s100
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use s101
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use vcc_lo
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; reg use vcc_hi
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_readlane_b32 s101, v1, 6
+; CHECK-NEXT:    v_readlane_b32 s100, v1, 5
+; CHECK-NEXT:    v_readlane_b32 s99, v1, 4
+; CHECK-NEXT:    v_readlane_b32 s98, v1, 3
+; CHECK-NEXT:    v_readlane_b32 s97, v1, 2
+; CHECK-NEXT:    v_readlane_b32 s96, v1, 1
+; CHECK-NEXT:    v_readlane_b32 s95, v1, 0
+; CHECK-NEXT:    v_readlane_b32 s94, v0, 63
+; CHECK-NEXT:    v_readlane_b32 s93, v0, 62
+; CHECK-NEXT:    v_readlane_b32 s92, v0, 61
+; CHECK-NEXT:    v_readlane_b32 s91, v0, 60
+; CHECK-NEXT:    v_readlane_b32 s90, v0, 59
+; CHECK-NEXT:    v_readlane_b32 s89, v0, 58
+; CHECK-NEXT:    v_readlane_b32 s88, v0, 57
+; CHECK-NEXT:    v_readlane_b32 s87, v0, 56
+; CHECK-NEXT:    v_readlane_b32 s86, v0, 55
+; CHECK-NEXT:    v_readlane_b32 s85, v0, 54
+; CHECK-NEXT:    v_readlane_b32 s84, v0, 53
+; CHECK-NEXT:    v_readlane_b32 s83, v0, 52
+; CHECK-NEXT:    v_readlane_b32 s82, v0, 51
+; CHECK-NEXT:    v_readlane_b32 s81, v0, 50
+; CHECK-NEXT:    v_readlane_b32 s80, v0, 49
+; CHECK-NEXT:    v_readlane_b32 s79, v0, 48
+; CHECK-NEXT:    v_readlane_b32 s78, v0, 47
+; CHECK-NEXT:    v_readlane_b32 s77, v0, 46
+; CHECK-NEXT:    v_readlane_b32 s76, v0, 45
+; CHECK-NEXT:    v_readlane_b32 s75, v0, 44
+; CHECK-NEXT:    v_readlane_b32 s74, v0, 43
+; CHECK-NEXT:    v_readlane_b32 s73, v0, 42
+; CHECK-NEXT:    v_readlane_b32 s72, v0, 41
+; CHECK-NEXT:    v_readlane_b32 s71, v0, 40
+; CHECK-NEXT:    v_readlane_b32 s70, v0, 39
+; CHECK-NEXT:    v_readlane_b32 s69, v0, 38
+; CHECK-NEXT:    v_readlane_b32 s68, v0, 37
+; CHECK-NEXT:    v_readlane_b32 s67, v0, 36
+; CHECK-NEXT:    v_readlane_b32 s66, v0, 35
+; CHECK-NEXT:    v_readlane_b32 s65, v0, 34
+; CHECK-NEXT:    v_readlane_b32 s64, v0, 33
+; CHECK-NEXT:    v_readlane_b32 s63, v0, 32
+; CHECK-NEXT:    v_readlane_b32 s62, v0, 31
+; CHECK-NEXT:    v_readlane_b32 s61, v0, 30
+; CHECK-NEXT:    v_readlane_b32 s60, v0, 29
+; CHECK-NEXT:    v_readlane_b32 s59, v0, 28
+; CHECK-NEXT:    v_readlane_b32 s58, v0, 27
+; CHECK-NEXT:    v_readlane_b32 s57, v0, 26
+; CHECK-NEXT:    v_readlane_b32 s56, v0, 25
+; CHECK-NEXT:    v_readlane_b32 s55, v0, 24
+; CHECK-NEXT:    v_readlane_b32 s54, v0, 23
+; CHECK-NEXT:    v_readlane_b32 s53, v0, 22
+; CHECK-NEXT:    v_readlane_b32 s52, v0, 21
+; CHECK-NEXT:    v_readlane_b32 s51, v0, 20
+; CHECK-NEXT:    v_readlane_b32 s50, v0, 19
+; CHECK-NEXT:    v_readlane_b32 s49, v0, 18
+; CHECK-NEXT:    v_readlane_b32 s48, v0, 17
+; CHECK-NEXT:    v_readlane_b32 s47, v0, 16
+; CHECK-NEXT:    v_readlane_b32 s46, v0, 15
+; CHECK-NEXT:    v_readlane_b32 s45, v0, 14
+; CHECK-NEXT:    v_readlane_b32 s44, v0, 13
+; CHECK-NEXT:    v_readlane_b32 s43, v0, 12
+; CHECK-NEXT:    v_readlane_b32 s42, v0, 11
+; CHECK-NEXT:    v_readlane_b32 s41, v0, 10
+; CHECK-NEXT:    v_readlane_b32 s40, v0, 9
+; CHECK-NEXT:    v_readlane_b32 s39, v0, 8
+; CHECK-NEXT:    v_readlane_b32 s38, v0, 7
+; CHECK-NEXT:    v_readlane_b32 s37, v0, 6
+; CHECK-NEXT:    v_readlane_b32 s36, v0, 5
+; CHECK-NEXT:    v_readlane_b32 s35, v0, 4
+; CHECK-NEXT:    v_readlane_b32 s34, v0, 3
+; CHECK-NEXT:    v_readlane_b32 s33, v0, 2
+; CHECK-NEXT:    v_readlane_b32 s31, v0, 1
+; CHECK-NEXT:    v_readlane_b32 s30, v0, 0
+; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; CHECK-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b64 exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0
+  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
+  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
+  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
+  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
+  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
+  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
+  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
+  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
+  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
+  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
+  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
+  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
+  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
+  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
+  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
+  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
+  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
+  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
+  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
+  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
+  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
+  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
+  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
+  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
+  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
+  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
+  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
+  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
+  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
+  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
+  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
+  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
+  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
+  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
+  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
+  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
+  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
+  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
+  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
+  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
+  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
+  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
+  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
+  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
+  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
+  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
+  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
+  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
+  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
+  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
+  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
+  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
+  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
+  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
+  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
+  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
+  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
+  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
+  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
+  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
+  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
+  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
+  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
+  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
+  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
+  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
+  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
+  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
+  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
+  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
+  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
+  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
+  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
+  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
+  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
+  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
+  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
+  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
+  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
+  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
+  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
+  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
+  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
+  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
+  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
+  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
+  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
+  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
+  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
+  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
+  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
+  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
+  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
+  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
+  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
+  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
+  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
+  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
+  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
+  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
+  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
+  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
+  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() #0
+  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() #0
+
+  %cmp = icmp eq i32 %cnd, 42
+  br i1 %cmp, label %bb3, label %bb4 ; +8 dword branch
+
+bb4:
+  %cmp4 = icmp eq i32 %cnd, 113
+  br i1 %cmp4, label %bb3, label %bb2 ; +8 dword branch
+
+bb2: ; 260 bytes
+  ; 256 byte asm
+  call void asm sideeffect
+   "v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
+    v_nop_e64
     v_nop_e64
     v_nop_e64
     v_nop_e64
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -1295,3 +1295,338 @@
 
   ret void
 }
+
+define void @relax_jal_spill_32_deduplicate_restore_block() {
+; CHECK-RV32-LABEL: relax_jal_spill_32_deduplicate_restore_block:
+; CHECK-RV32:       # %bb.0: # %entry
+; CHECK-RV32-NEXT:    addi sp, sp, -64
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-RV32-NEXT:    .cfi_offset s5, -28
+; CHECK-RV32-NEXT:    .cfi_offset s6, -32
+; CHECK-RV32-NEXT:    .cfi_offset s7, -36
+; CHECK-RV32-NEXT:    .cfi_offset s8, -40
+; CHECK-RV32-NEXT:    .cfi_offset s9, -44
+; CHECK-RV32-NEXT:    .cfi_offset s10, -48
+; CHECK-RV32-NEXT:    .cfi_offset s11, -52
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li ra, 1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t0, 5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t1, 6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t2, 7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s0, 8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s1, 9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a0, 10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a1, 11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a2, 12
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a3, 13
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a4, 14
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a5, 15
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a6, 16
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a7, 17
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s2, 18
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s3, 19
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s4, 20
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s5, 21
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s6, 22
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s7, 23
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s8, 24
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s9, 25
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s10, 26
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s11, 27
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t3, 28
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t4, 29
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t5, 30
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t6, 31
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    bne s10, s11, .LBB6_1
+; CHECK-RV32-NEXT:  # %bb.6: # %entry
+; CHECK-RV32-NEXT:    sw s11, 0(sp)
+; CHECK-RV32-NEXT:    jump .LBB6_7, s11
+; CHECK-RV32-NEXT:  .LBB6_1: # %cond_2
+; CHECK-RV32-NEXT:    bne t3, t4, .LBB6_2
+; CHECK-RV32-NEXT:  # %bb.8: # %cond_2
+; CHECK-RV32-NEXT:    sw s11, 0(sp)
+; CHECK-RV32-NEXT:    jump .LBB6_7, s11
+; CHECK-RV32-NEXT:  .LBB6_2: # %cond_3
+; CHECK-RV32-NEXT:    bne t5, t6, .LBB6_3
+; CHECK-RV32-NEXT:  # %bb.10: # %cond_3
+; CHECK-RV32-NEXT:    sw s11, 0(sp)
+; CHECK-RV32-NEXT:    jump .LBB6_11, s11
+; CHECK-RV32-NEXT:  .LBB6_3: # %space
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    .zero 1048576
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    j .LBB6_4
+; CHECK-RV32-NEXT:  .LBB6_7: # %dest_2
+; CHECK-RV32-NEXT:    lw s11, 0(sp)
+; CHECK-RV32-NEXT:  .LBB6_4: # %dest_1
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # dest 1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    j .LBB6_5
+; CHECK-RV32-NEXT:  .LBB6_11: # %dest_2
+; CHECK-RV32-NEXT:    lw s11, 0(sp)
+; CHECK-RV32-NEXT:  .LBB6_5: # %dest_2
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # dest 2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use ra
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 64
+; CHECK-RV32-NEXT:    ret
+entry:
+  %ra = call i32 asm sideeffect "addi ra, x0, 1", "={ra}"()
+  %t0 = call i32 asm sideeffect "addi t0, x0, 5", "={t0}"()
+  %t1 = call i32 asm sideeffect "addi t1, x0, 6", "={t1}"()
+  %t2 = call i32 asm sideeffect "addi t2, x0, 7", "={t2}"()
+  %s0 = call i32 asm sideeffect "addi s0, x0, 8", "={s0}"()
+  %s1 = call i32 asm sideeffect "addi s1, x0, 9", "={s1}"()
+  %a0 = call i32 asm sideeffect "addi a0, x0, 10", "={a0}"()
+  %a1 = call i32 asm sideeffect "addi a1, x0, 11", "={a1}"()
+  %a2 = call i32 asm sideeffect "addi a2, x0, 12", "={a2}"()
+  %a3 = call i32 asm sideeffect "addi a3, x0, 13", "={a3}"()
+  %a4 = call i32 asm sideeffect "addi a4, x0, 14", "={a4}"()
+  %a5 = call i32 asm sideeffect "addi a5, x0, 15", "={a5}"()
+  %a6 = call i32 asm sideeffect "addi a6, x0, 16", "={a6}"()
+  %a7 = call i32 asm sideeffect "addi a7, x0, 17", "={a7}"()
+  %s2 = call i32 asm sideeffect "addi s2, x0, 18", "={s2}"()
+  %s3 = call i32 asm sideeffect "addi s3, x0, 19", "={s3}"()
+  %s4 = call i32 asm sideeffect "addi s4, x0, 20", "={s4}"()
+  %s5 = call i32 asm sideeffect "addi s5, x0, 21", "={s5}"()
+  %s6 = call i32 asm sideeffect "addi s6, x0, 22", "={s6}"()
+  %s7 = call i32 asm sideeffect "addi s7, x0, 23", "={s7}"()
+  %s8 = call i32 asm sideeffect "addi s8, x0, 24", "={s8}"()
+  %s9 = call i32 asm sideeffect "addi s9, x0, 25", "={s9}"()
+  %s10 = call i32 asm sideeffect "addi s10, x0, 26", "={s10}"()
+  %s11 = call i32 asm sideeffect "addi s11, x0, 27", "={s11}"()
+  %t3 = call i32 asm sideeffect "addi t3, x0, 28", "={t3}"()
+  %t4 = call i32 asm sideeffect "addi t4, x0, 29", "={t4}"()
+  %t5 = call i32 asm sideeffect "addi t5, x0, 30", "={t5}"()
+  %t6 = call i32 asm sideeffect "addi t6, x0, 31", "={t6}"()
+
+  br label %cond_1
+
+cond_1:
+  %cmp1 = icmp eq i32 %s10, %s11
+  br i1 %cmp1, label %dest_1, label %cond_2
+
+cond_2:
+  %cmp2 = icmp eq i32 %t3, %t4
+  br i1 %cmp2, label %dest_1, label %cond_3
+
+cond_3:
+  %cmp3 = icmp eq i32 %t5, %t6
+  br i1 %cmp3, label %dest_2, label %space
+
+space:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %dest_1
+
+dest_1:
+  call void asm sideeffect "# dest 1", ""()
+  br label %dest_2
+
+dest_2:
+  call void asm sideeffect "# dest 2", ""()
+  br label %tail
+
+tail:
+  call void asm sideeffect "# reg use $0", "{ra}"(i32 %ra)
+  call void asm sideeffect "# reg use $0", "{t0}"(i32 %t0)
+  call void asm sideeffect "# reg use $0", "{t1}"(i32 %t1)
+  call void asm sideeffect "# reg use $0", "{t2}"(i32 %t2)
+  call void asm sideeffect "# reg use $0", "{s0}"(i32 %s0)
+  call void asm sideeffect "# reg use $0", "{s1}"(i32 %s1)
+  call void asm sideeffect "# reg use $0", "{a0}"(i32 %a0)
+  call void asm sideeffect "# reg use $0", "{a1}"(i32 %a1)
+  call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7)
+  call void asm sideeffect "# reg use $0", "{s2}"(i32 %s2)
+  call void asm sideeffect "# reg use $0", "{s3}"(i32 %s3)
+  call void asm sideeffect "# reg use $0", "{s4}"(i32 %s4)
+  call void asm sideeffect "# reg use $0", "{s5}"(i32 %s5)
+  call void asm sideeffect "# reg use $0", "{s6}"(i32 %s6)
+  call void asm sideeffect "# reg use $0", "{s7}"(i32 %s7)
+  call void asm sideeffect "# reg use $0", "{s8}"(i32 %s8)
+  call void asm sideeffect "# reg use $0", "{s9}"(i32 %s9)
+  call void asm sideeffect "# reg use $0", "{s10}"(i32 %s10)
+  call void asm sideeffect "# reg use $0", "{s11}"(i32 %s11)
+  call void asm sideeffect "# reg use $0", "{t3}"(i32 %t3)
+  call void asm sideeffect "# reg use $0", "{t4}"(i32 %t4)
+  call void asm sideeffect "# reg use $0", "{t5}"(i32 %t5)
+  call void asm sideeffect "# reg use $0", "{t6}"(i32 %t6)
+
+  ret void
+}