diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" @@ -596,11 +597,12 @@ /// NewDestBB. Optionally, insert the clobbered register restoring in \p /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to /// the offset of the position to insert the new branch. - virtual void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset = 0, - RegScavenger *RS = nullptr) const { + virtual void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset = 0, RegScavenger *RS = nullptr) const { llvm_unreachable("target did not implement"); } diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -8,6 +8,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -44,8 +45,7 @@ namespace { class BranchRelaxation : public MachineFunctionPass { - /// BasicBlockInfo - Information about the offset and size of a single - /// basic block. + /// BasicBlockInfo - Information of a single basic block. struct BasicBlockInfo { /// Offset - Distance from the beginning of the function to the beginning /// of this basic block. @@ -60,6 +60,13 @@ /// beginning of the block, or from an aligned jump table at the end. unsigned Size = 0; + /// RestoreBlocks - Restore blocks for this basic block when this block + /// is a branch destination. + /// + /// This is used to deduplicate restore blocks. See deduplicateRestoreBlock + /// and fixupUnconditionalBranch for details. + TinyPtrVector RestoreBlocks; + BasicBlockInfo() = default; /// Compute the offset immediately following this block. \p MBB is the next @@ -75,6 +82,48 @@ // can't tell whether or not it will insert nops. Assume that it will. return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value(); } + + /// Returns true if \p OldRestoreBB and \p NewRestoreBB are identical + /// restore blocks. + static bool isRestoreBlockIdentical(const MachineBasicBlock *OldRestoreBB, + const MachineBasicBlock *NewRestoreBB) { + MachineBasicBlock::const_iterator ItOld = OldRestoreBB->begin(), + ItEndOld = OldRestoreBB->end(), + ItNew = NewRestoreBB->begin(), + ItEndNew = NewRestoreBB->end(); + while (true) { + // Since target shouldn't place branches in the restore block, the only + // possible branch is placed by fixupUnconditionalBranch when there are + // multiple different restore blocks. All but the last restore blocks + // has an unconditional branch to DestBB when they act as PrevBB. + // This branch doesn't take part in the check of whether two + // restore blocks are identical. + if (ItOld != ItEndOld && ItOld->isBranch()) + ItEndOld = ItOld; + if (ItOld == ItEndOld || ItNew == ItEndNew) + return ItOld == ItEndOld && ItNew == ItEndNew; + if (!ItOld->isIdenticalTo(*ItNew)) + return false; + ++ItOld; + ++ItNew; + } + } + + /// Iterate over previously inserted restore blocks, returns the one that + /// can be reused. That is target can insert an unconditional branch + /// to it and fixupUnconditionalBranch can erase \p RestoreBB from + /// the function. If no reusable restore block is found, insert \p RestoreBB + /// to RestoreBlocks and returns it. + MachineBasicBlock *deduplicateRestoreBlock(MachineBasicBlock *RestoreBB) { + assert(!RestoreBB->empty() && + "RestoreBB to be deduplicated should not be empty"); + for (auto &DeduplicatedBB : RestoreBlocks) + if (isRestoreBlockIdentical(DeduplicatedBB, RestoreBB)) + return DeduplicatedBB; + + RestoreBlocks.push_back(RestoreBB); + return RestoreBB; + } }; SmallVector BlockInfo; @@ -480,42 +529,62 @@ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), DestBB->getBasicBlock()); - TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, - DestOffset - SrcOffset, RS.get()); + // For multiple far branches to the same destination, there are chances + // that some restore blocks could be shared if they clobber the same + // registers and share the same restore sequence. We allow target to call + // DeduplicateRestoreBB to find a previously inserted restore block + // identical to RestoreBB and insert an unconditional branch to it, + // then we can erase RestoreBB from the function. + BasicBlockInfo &DestBBInfo = BlockInfo[DestBB->getNumber()]; + MachineBasicBlock *DeduplicatedBB = RestoreBB; + auto DeduplicateRestoreBB = [&DestBBInfo, + &DeduplicatedBB](MachineBasicBlock *RestoreBB) { + return DeduplicatedBB = DestBBInfo.deduplicateRestoreBlock(RestoreBB); + }; + + TII->insertIndirectBranch(*BranchBB, *DestBB, RestoreBB, DL, + DeduplicateRestoreBB, DestOffset - SrcOffset, + RS.get()); BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); - // If RestoreBB is required, try to place just before DestBB. - if (!RestoreBB->empty()) { - // TODO: For multiple far branches to the same destination, there are - // chances that some restore blocks could be shared if they clobber the - // same registers and share the same restore sequence. So far, those - // restore blocks are just duplicated for each far branch. - assert(!DestBB->isEntryBlock()); - MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); - if (auto *FT = PrevBB->getFallThrough()) { - assert(FT == DestBB); - TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); - // Recalculate the block size. - BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); - } - // Now, RestoreBB could be placed directly before DestBB. - MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); - // Update successors and predecessors. - RestoreBB->addSuccessor(DestBB); - BranchBB->replaceSuccessor(DestBB, RestoreBB); - if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeAndAddLiveIns(LiveRegs, *RestoreBB); - // Compute the restore block size. - BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); - // Update the offset starting from the previous block. - adjustBlockOffsets(*PrevBB); - } else { - // Remove restore block if it's not required. + if (DeduplicatedBB != RestoreBB) + BranchBB->replaceSuccessor(DestBB, DeduplicatedBB); + if (DeduplicatedBB != RestoreBB || RestoreBB->empty()) { + // Remove restore block if it is deduplicated or is not required. MF->erase(RestoreBB); + return true; } + // Restore block should not contain any branch + for (auto &RestoreI : *RestoreBB) { + assert(!RestoreI.isBranch() && + "Restore blocks should not contain branch instruction"); + } + + // If RestoreBB is required, try to place just before DestBB. + + assert(!DestBB->isEntryBlock()); + MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); + if (auto *FT = PrevBB->getFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); + // Recalculate the block size. + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); + } + // Now, RestoreBB could be placed directly before DestBB. + MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); + // Update successors and predecessors. + RestoreBB->addSuccessor(DestBB); + BranchBB->replaceSuccessor(DestBB, RestoreBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *RestoreBB); + // Compute the restore block size. + BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); + // Update the offset starting from the previous block. + adjustBlockOffsets(*PrevBB); + return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -277,10 +277,12 @@ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2379,16 +2379,16 @@ return MI.getOperand(0).getMBB(); } -void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void SIInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); - assert(RestoreBB.empty() && + assert(RestoreBB->empty() && "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); @@ -2426,12 +2426,6 @@ BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) .addReg(PCReg); - // FIXME: If spilling is necessary, this will fail because this scavenger has - // no emergency stack slots. It is non-trivial to spill in this situation, - // because the restore code needs to be specially placed after the - // jump. BranchRelaxation then needs to be made aware of the newly inserted - // block. - // // If a spill is needed for the pc register pair, we need to insert a spill // restore block right before the destination block, and insert a short branch // into the old destination block's fallthrough predecessor. @@ -2462,6 +2456,8 @@ // dest_bb: // buzz; + const MachineBasicBlock *BranchDest; + RS->enterBasicBlockEnd(MBB); Register Scav = RS->scavengeRegisterBackwards( AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), @@ -2470,20 +2466,23 @@ RS->setRegUsed(Scav); MRI.replaceRegWith(PCReg, Scav); MRI.clearVirtRegs(); + BranchDest = &DestBB; } else { // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for // SGPR spill. const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + + TRI->spillEmergencySGPR(GetPC, *RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + BranchDest = DeduplicateRestoreBB(RestoreBB); + MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); MRI.clearVirtRegs(); } - MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); // Now, the distance could be defined. auto *Offset = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DestLabel, MCCtx), + MCSymbolRefExpr::create(BranchDest->getSymbol(), MCCtx), MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); // Add offset assignments. auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -107,10 +107,12 @@ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; private: const AVRRegisterInfo RI; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -555,11 +555,11 @@ } } -void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void AVRInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { // This method inserts a *direct* branch (JMP), despite its name. // LLVM calls this method to fixup unconditional branches; it never calls // insertBranch or some hypothetical "insertDirectBranch". diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -94,10 +94,12 @@ const DebugLoc &dl, int *BytesAdded = nullptr) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -949,16 +949,16 @@ return 2; } -void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void RISCVInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); - assert(RestoreBB.empty() && + assert(RestoreBB->empty() && "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); @@ -975,7 +975,7 @@ // uses the same workaround). Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); - // We may also update the jump target to RestoreBB later. + // We may also update the jump target later. MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) .addReg(ScratchReg, RegState::Define | RegState::Dead) .addMBB(&DestBB, RISCVII::MO_CALL); @@ -1002,12 +1002,13 @@ TRI->eliminateFrameIndex(std::prev(MI.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); - MI.getOperand(1).setMBB(&RestoreBB); - - loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, + loadRegFromStackSlot(*RestoreBB, RestoreBB->end(), TmpGPR, FrameIndex, &RISCV::GPRRegClass, TRI); - TRI->eliminateFrameIndex(RestoreBB.back(), + TRI->eliminateFrameIndex(RestoreBB->back(), /*SpAdj=*/0, /*FIOperandNum=*/1); + + MachineBasicBlock *JumpDest = DeduplicateRestoreBB(RestoreBB); + MI.getOperand(1).setMBB(JumpDest); } MRI.replaceRegWith(ScratchReg, TmpGPR); diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -2378,8 +2378,8 @@ ; CHECK-NEXT: v_writelane_b32 v1, s1, 1 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: .Lpost_getpc3: -; CHECK-NEXT: s_add_u32 s0, s0, (.LBB2_7-.Lpost_getpc3)&4294967295 -; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB2_7-.Lpost_getpc3)>>32 +; CHECK-NEXT: s_add_u32 s0, s0, (.LBB2_5-.Lpost_getpc3)&4294967295 +; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB2_5-.Lpost_getpc3)>>32 ; CHECK-NEXT: s_setpc_b64 s[0:1] ; CHECK-NEXT: .LBB2_2: ; %bb2 ; CHECK-NEXT: ;;#ASMSTART @@ -2422,12 +2422,6 @@ ; CHECK-NEXT: v_readlane_b32 s1, v1, 1 ; CHECK-NEXT: buffer_load_dword v1, off, s[96:99], 0 ; CHECK-NEXT: s_not_b64 exec, exec -; CHECK-NEXT: s_branch .LBB2_3 -; CHECK-NEXT: .LBB2_7: ; %bb3 -; CHECK-NEXT: v_readlane_b32 s0, v1, 0 -; CHECK-NEXT: v_readlane_b32 s1, v1, 1 -; CHECK-NEXT: buffer_load_dword v1, off, s[96:99], 0 -; CHECK-NEXT: s_not_b64 exec, exec ; CHECK-NEXT: .LBB2_3: ; %bb3 ; CHECK-NEXT: v_readlane_b32 s0, v0, 0 ; CHECK-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll --- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll @@ -1770,12 +1770,12 @@ ; CHECK-RV32-NEXT: bne t1, t2, .LBB7_3 ; CHECK-RV32-NEXT: # %bb.14: # %cond_3 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_15, s11 +; CHECK-RV32-NEXT: jump .LBB7_11, s11 ; CHECK-RV32-NEXT: .LBB7_3: # %cond_4 ; CHECK-RV32-NEXT: bne s9, s10, .LBB7_4 ; CHECK-RV32-NEXT: # %bb.16: # %cond_4 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_17, s11 +; CHECK-RV32-NEXT: jump .LBB7_13, s11 ; CHECK-RV32-NEXT: .LBB7_4: # %cond_5 ; CHECK-RV32-NEXT: bne s7, s8, .LBB7_5 ; CHECK-RV32-NEXT: # %bb.18: # %cond_5 @@ -1785,7 +1785,7 @@ ; CHECK-RV32-NEXT: bne s5, s6, .LBB7_6 ; CHECK-RV32-NEXT: # %bb.20: # %cond_6 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_21, s11 +; CHECK-RV32-NEXT: jump .LBB7_11, s11 ; CHECK-RV32-NEXT: .LBB7_23: # %space ; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_6: # %space @@ -1795,12 +1795,6 @@ ; CHECK-RV32-NEXT: j .LBB7_7 ; CHECK-RV32-NEXT: .LBB7_11: # %dest_1 ; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_7 -; CHECK-RV32-NEXT: .LBB7_15: # %dest_1 -; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_7 -; CHECK-RV32-NEXT: .LBB7_21: # %dest_1 -; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_7: # %dest_1 ; CHECK-RV32-NEXT: #APP ; CHECK-RV32-NEXT: # dest 1 @@ -1808,9 +1802,6 @@ ; CHECK-RV32-NEXT: j .LBB7_8 ; CHECK-RV32-NEXT: .LBB7_13: # %dest_2 ; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_8 -; CHECK-RV32-NEXT: .LBB7_17: # %dest_2 -; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_8: # %dest_2 ; CHECK-RV32-NEXT: #APP ; CHECK-RV32-NEXT: # dest 2