Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/Uniformity.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -602,13 +603,16 @@ /// Insert an unconditional indirect branch at the end of \p MBB to \p /// NewDestBB. Optionally, insert the clobbered register restoring in \p - /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to - /// the offset of the position to insert the new branch. - virtual void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset = 0, - RegScavenger *RS = nullptr) const { + /// RestoreBB. The target can also optionally call \p DeduplicateRestoreBB + /// with \p RestoreBB after constructing it and take the return value as the + /// deduplicated restore block. \p BrOffset indicates the offset of \p + /// NewDestBB relative to the offset of the position to insert the new branch. + virtual void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset = 0, RegScavenger *RS = nullptr) const { llvm_unreachable("target did not implement"); } Index: llvm/lib/CodeGen/BranchRelaxation.cpp =================================================================== --- llvm/lib/CodeGen/BranchRelaxation.cpp +++ llvm/lib/CodeGen/BranchRelaxation.cpp @@ -8,6 +8,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -44,8 +45,7 @@ namespace { class BranchRelaxation : public MachineFunctionPass { - /// BasicBlockInfo - Information about the offset and size of a single - /// basic block. + /// BasicBlockInfo - Information of a single basic block. struct BasicBlockInfo { /// Offset - Distance from the beginning of the function to the beginning /// of this basic block. @@ -60,6 +60,13 @@ /// beginning of the block, or from an aligned jump table at the end. unsigned Size = 0; + /// RestoreBlocks - Restore blocks for this basic block when this block + /// is a branch destination. + /// + /// This is used to deduplicate restore blocks. See deduplicateRestoreBlock + /// and fixupUnconditionalBranch for details. + TinyPtrVector RestoreBlocks; + BasicBlockInfo() = default; /// Compute the offset immediately following this block. \p MBB is the next @@ -75,6 +82,48 @@ // can't tell whether or not it will insert nops. Assume that it will. return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value(); } + + /// Returns true if \p OldRestoreBB and \p NewRestoreBB are identical + /// restore blocks. + static bool isRestoreBlockIdentical(const MachineBasicBlock *OldRestoreBB, + const MachineBasicBlock *NewRestoreBB) { + + auto ItNew = NewRestoreBB->begin(); + auto ItNewEnd = NewRestoreBB->end(); + + for (auto &ItOld : *OldRestoreBB) { + // Since target shouldn't place terminators in the restore block, the + // only possible terminator is a unconditional branch is placed by + // fixupUnconditionalBranch when there are multiple different restore + // blocks. All but the last restore blocks have an unconditional branch + // to DestBB when they act as PrevBB. This branch doesn't take part in + // the check of whether two restore blocks are identical. + if (ItOld.isTerminator()) + break; + // FIXME: consider the various flags on the block (e.g. alignment) + if (ItNew == ItNewEnd || !ItOld.isIdenticalTo(*ItNew)) + return false; + ++ItNew; + } + + return ItNew == ItNewEnd; + } + + /// Iterate over previously inserted restore blocks, returns the one that + /// can be reused. That is target can insert an unconditional branch + /// to it and fixupUnconditionalBranch can erase \p RestoreBB from + /// the function. If no reusable restore block is found, insert \p RestoreBB + /// to RestoreBlocks and returns it. + MachineBasicBlock *deduplicateRestoreBlock(MachineBasicBlock *RestoreBB) { + assert(!RestoreBB->empty() && + "RestoreBB to be deduplicated should not be empty"); + for (auto &DeduplicatedBB : RestoreBlocks) + if (isRestoreBlockIdentical(DeduplicatedBB, RestoreBB)) + return DeduplicatedBB; + + RestoreBlocks.push_back(RestoreBB); + return RestoreBB; + } }; SmallVector BlockInfo; @@ -493,43 +542,62 @@ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), DestBB->getBasicBlock()); - TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, - DestOffset - SrcOffset, RS.get()); + // For multiple far branches to the same destination, there are chances + // that some restore blocks could be shared if they clobber the same + // registers and share the same restore sequence. We allow target to call + // DeduplicateRestoreBB to find a previously inserted restore block + // identical to RestoreBB and insert an unconditional branch to it, + // then we can erase RestoreBB from the function. + BasicBlockInfo &DestBBInfo = BlockInfo[DestBB->getNumber()]; + MachineBasicBlock *DeduplicatedBB = RestoreBB; + auto DeduplicateRestoreBB = [&](MachineBasicBlock *RestoreBasicBlock) { + assert(RestoreBB == RestoreBasicBlock); + return DeduplicatedBB = DestBBInfo.deduplicateRestoreBlock(RestoreBB); + }; + + TII->insertIndirectBranch(*BranchBB, *DestBB, RestoreBB, DL, + DeduplicateRestoreBB, DestOffset - SrcOffset, + RS.get()); BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); - // If RestoreBB is required, try to place just before DestBB. - if (!RestoreBB->empty()) { - // TODO: For multiple far branches to the same destination, there are - // chances that some restore blocks could be shared if they clobber the - // same registers and share the same restore sequence. So far, those - // restore blocks are just duplicated for each far branch. - assert(!DestBB->isEntryBlock()); - MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); - // Fall through only if PrevBB has no unconditional branch as one of its - // terminators. - if (auto *FT = PrevBB->getLogicalFallThrough()) { - assert(FT == DestBB); - TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); - BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); - } - // Now, RestoreBB could be placed directly before DestBB. - MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); - // Update successors and predecessors. - RestoreBB->addSuccessor(DestBB); - BranchBB->replaceSuccessor(DestBB, RestoreBB); - if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeAndAddLiveIns(LiveRegs, *RestoreBB); - // Compute the restore block size. - BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); - // Update the offset starting from the previous block. - adjustBlockOffsets(*PrevBB); - } else { - // Remove restore block if it's not required. + if (DeduplicatedBB != RestoreBB) + BranchBB->replaceSuccessor(DestBB, DeduplicatedBB); + if (DeduplicatedBB != RestoreBB || RestoreBB->empty()) { + // Remove restore block if it is deduplicated or is not required. MF->erase(RestoreBB); + return true; + } + + for (auto &RestoreI : *RestoreBB) { + assert(!RestoreI.isBranch() && + "Restore blocks should not contain branch instruction"); } + // If RestoreBB is required, try to place just before DestBB. + + assert(!DestBB->isEntryBlock()); + MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); + // Fall through only if PrevBB has no unconditional branch as one of its + // terminators. + if (auto *FT = PrevBB->getLogicalFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); + } + // Now, RestoreBB could be placed directly before DestBB. + MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); + // Update successors and predecessors. + RestoreBB->addSuccessor(DestBB); + BranchBB->replaceSuccessor(DestBB, RestoreBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *RestoreBB); + // Compute the restore block size. + BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); + // Update the offset starting from the previous block. + adjustBlockOffsets(*PrevBB); + return true; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -280,10 +280,12 @@ /// Note this only work before lowering the pseudo control flow instructions. bool hasDivergentBranch(const MachineBasicBlock *MBB) const; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2508,16 +2508,16 @@ return false; } -void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void SIInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); - assert(RestoreBB.empty() && + assert(RestoreBB->empty() && "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); @@ -2585,6 +2585,8 @@ // dest_bb: // buzz; + const MachineBasicBlock *BranchDest; + RS->enterBasicBlockEnd(MBB); Register Scav = RS->scavengeRegisterBackwards( AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), @@ -2593,20 +2595,23 @@ RS->setRegUsed(Scav); MRI.replaceRegWith(PCReg, Scav); MRI.clearVirtRegs(); + BranchDest = &DestBB; } else { // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for // SGPR spill. const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + + TRI->spillEmergencySGPR(GetPC, *RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + BranchDest = DeduplicateRestoreBB(RestoreBB); + MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); MRI.clearVirtRegs(); } - MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); // Now, the distance could be defined. auto *Offset = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DestLabel, MCCtx), + MCSymbolRefExpr::create(BranchDest->getSymbol(), MCCtx), MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); // Add offset assignments. auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); Index: llvm/lib/Target/AVR/AVRInstrInfo.h =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.h +++ llvm/lib/Target/AVR/AVRInstrInfo.h @@ -109,10 +109,12 @@ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; private: const AVRRegisterInfo RI; Index: llvm/lib/Target/AVR/AVRInstrInfo.cpp =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -554,11 +554,11 @@ } } -void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void AVRInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { // This method inserts a *direct* branch (JMP), despite its name. // LLVM calls this method to fixup unconditional branches; it never calls // insertBranch or some hypothetical "insertDirectBranch". Index: llvm/lib/Target/LoongArch/LoongArchInstrInfo.h =================================================================== --- llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -68,10 +68,12 @@ const DebugLoc &dl, int *BytesAdded = nullptr) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; Index: llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp =================================================================== --- llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -16,6 +16,7 @@ #include "LoongArchRegisterInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/RegisterScavenging.h" using namespace llvm; @@ -363,12 +364,11 @@ return 2; } -void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void LoongArchInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); @@ -417,12 +417,15 @@ &LoongArch::GPRRegClass, TRI, Register()); TRI->eliminateFrameIndex(std::prev(PCALAU12I.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); - PCALAU12I.getOperand(1).setMBB(&RestoreBB); - ADDI.getOperand(2).setMBB(&RestoreBB); - loadRegFromStackSlot(RestoreBB, RestoreBB.end(), Scav, FrameIndex, + + loadRegFromStackSlot(*RestoreBB, RestoreBB->end(), Scav, FrameIndex, &LoongArch::GPRRegClass, TRI, Register()); - TRI->eliminateFrameIndex(RestoreBB.back(), + TRI->eliminateFrameIndex(RestoreBB->back(), /*SpAdj=*/0, /*FIOperandNum=*/1); + + MachineBasicBlock *JumpDest = DeduplicateRestoreBB(RestoreBB); + PCALAU12I.getOperand(1).setMBB(JumpDest); + ADDI.getOperand(2).setMBB(JumpDest); } MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); Index: llvm/lib/Target/RISCV/RISCVInstrInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -96,10 +96,12 @@ const DebugLoc &dl, int *BytesAdded = nullptr) const override; - void insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - MachineBasicBlock &RestoreBB, const DebugLoc &DL, - int64_t BrOffset, RegScavenger *RS) const override; + void + insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref + DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; Index: llvm/lib/Target/RISCV/RISCVInstrInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -951,16 +952,16 @@ return 2; } -void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - MachineBasicBlock &RestoreBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const { +void RISCVInstrInfo::insertIndirectBranch( + MachineBasicBlock &MBB, MachineBasicBlock &DestBB, + MachineBasicBlock *RestoreBB, const DebugLoc &DL, + function_ref DeduplicateRestoreBB, + int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); - assert(RestoreBB.empty() && + assert(RestoreBB->empty() && "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); @@ -977,7 +978,7 @@ // uses the same workaround). Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); - // We may also update the jump target to RestoreBB later. + // We may also update the jump target later. MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) .addReg(ScratchReg, RegState::Define | RegState::Dead) .addMBB(&DestBB, RISCVII::MO_CALL); @@ -1004,12 +1005,13 @@ TRI->eliminateFrameIndex(std::prev(MI.getIterator()), /*SpAdj=*/0, /*FIOperandNum=*/1); - MI.getOperand(1).setMBB(&RestoreBB); - - loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, + loadRegFromStackSlot(*RestoreBB, RestoreBB->end(), TmpGPR, FrameIndex, &RISCV::GPRRegClass, TRI, Register()); - TRI->eliminateFrameIndex(RestoreBB.back(), + TRI->eliminateFrameIndex(RestoreBB->back(), /*SpAdj=*/0, /*FIOperandNum=*/1); + + MachineBasicBlock *JumpDest = DeduplicateRestoreBB(RestoreBB); + MI.getOperand(1).setMBB(JumpDest); } MRI.replaceRegWith(ScratchReg, TmpGPR); Index: llvm/test/CodeGen/AMDGPU/branch-relax-spill-deduplication.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/branch-relax-spill-deduplication.ll +++ llvm/test/CodeGen/AMDGPU/branch-relax-spill-deduplication.ll @@ -346,8 +346,8 @@ ; CHECK-NEXT: v_writelane_b32 v1, s1, 1 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: .Lpost_getpc1: -; CHECK-NEXT: s_add_u32 s0, s0, (.LBB0_7-.Lpost_getpc1)&4294967295 -; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB0_7-.Lpost_getpc1)>>32 +; CHECK-NEXT: s_add_u32 s0, s0, (.LBB0_5-.Lpost_getpc1)&4294967295 +; CHECK-NEXT: s_addc_u32 s1, s1, (.LBB0_5-.Lpost_getpc1)>>32 ; CHECK-NEXT: s_setpc_b64 s[0:1] ; CHECK-NEXT: .LBB0_2: ; %bb2 ; CHECK-NEXT: ;;#ASMSTART @@ -390,12 +390,6 @@ ; CHECK-NEXT: v_readlane_b32 s1, v1, 1 ; CHECK-NEXT: buffer_load_dword v1, off, s[96:99], 0 ; CHECK-NEXT: s_not_b64 exec, exec -; CHECK-NEXT: s_branch .LBB0_3 -; CHECK-NEXT: .LBB0_7: ; %bb3 -; CHECK-NEXT: v_readlane_b32 s0, v1, 0 -; CHECK-NEXT: v_readlane_b32 s1, v1, 1 -; CHECK-NEXT: buffer_load_dword v1, off, s[96:99], 0 -; CHECK-NEXT: s_not_b64 exec, exec ; CHECK-NEXT: .LBB0_3: ; %bb3 ; CHECK-NEXT: v_readlane_b32 s0, v0, 0 ; CHECK-NEXT: ;;#ASMSTART @@ -973,7 +967,7 @@ ; CHECK-LABEL: spill_func_duplicated_restore_block: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] @@ -1049,10 +1043,10 @@ ; CHECK-NEXT: v_writelane_b32 v1, s100, 5 ; CHECK-NEXT: v_writelane_b32 v0, s93, 62 ; CHECK-NEXT: v_writelane_b32 v1, s101, 6 -; CHECK-NEXT: s_mov_b32 s34, s12 +; CHECK-NEXT: s_mov_b32 s49, s12 ; CHECK-NEXT: v_writelane_b32 v0, s94, 63 -; CHECK-NEXT: v_writelane_b32 v1, s34, 7 -; CHECK-NEXT: s_cmp_eq_u32 s34, 42 +; CHECK-NEXT: v_writelane_b32 v1, s49, 7 +; CHECK-NEXT: s_cmp_eq_u32 s49, 42 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -2018,7 +2012,7 @@ ; CHECK-NEXT: v_readlane_b32 s33, v0, 2 ; CHECK-NEXT: v_readlane_b32 s31, v0, 1 ; CHECK-NEXT: v_readlane_b32 s30, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] Index: llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll =================================================================== --- llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll +++ llvm/test/CodeGen/LoongArch/branch-relaxation-spill-32.ll @@ -447,15 +447,15 @@ ; CHECK-NEXT: bne $s3, $s4, .LBB1_3 ; CHECK-NEXT: # %bb.14: # %cond_3 ; CHECK-NEXT: st.w $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_15) -; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_15) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_11) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_11) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_3: # %cond_4 ; CHECK-NEXT: bne $s1, $s2, .LBB1_4 ; CHECK-NEXT: # %bb.16: # %cond_4 ; CHECK-NEXT: st.w $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_17) -; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_17) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_13) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_13) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_4: # %cond_5 ; CHECK-NEXT: bne $fp, $s0, .LBB1_5 @@ -468,8 +468,8 @@ ; CHECK-NEXT: bne $t7, $t8, .LBB1_6 ; CHECK-NEXT: # %bb.20: # %cond_6 ; CHECK-NEXT: st.w $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_21) -; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_21) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_11) +; CHECK-NEXT: addi.w $t8, $t8, %pc_lo12(.LBB1_11) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_6: # %space ; CHECK-NEXT: #APP @@ -478,12 +478,6 @@ ; CHECK-NEXT: b .LBB1_7 ; CHECK-NEXT: .LBB1_11: # %dest_1 ; CHECK-NEXT: ld.w $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_7 -; CHECK-NEXT: .LBB1_15: # %dest_1 -; CHECK-NEXT: ld.w $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_7 -; CHECK-NEXT: .LBB1_21: # %dest_1 -; CHECK-NEXT: ld.w $t8, $sp, 0 ; CHECK-NEXT: .LBB1_7: # %dest_1 ; CHECK-NEXT: #APP ; CHECK-NEXT: # dest 1 @@ -491,9 +485,6 @@ ; CHECK-NEXT: b .LBB1_8 ; CHECK-NEXT: .LBB1_13: # %dest_2 ; CHECK-NEXT: ld.w $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_8 -; CHECK-NEXT: .LBB1_17: # %dest_2 -; CHECK-NEXT: ld.w $t8, $sp, 0 ; CHECK-NEXT: .LBB1_8: # %dest_2 ; CHECK-NEXT: #APP ; CHECK-NEXT: # dest 2 Index: llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll =================================================================== --- llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll +++ llvm/test/CodeGen/LoongArch/branch-relaxation-spill-64.ll @@ -447,15 +447,15 @@ ; CHECK-NEXT: bne $s3, $s4, .LBB1_3 ; CHECK-NEXT: # %bb.14: # %cond_3 ; CHECK-NEXT: st.d $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_15) -; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_15) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_11) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_11) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_3: # %cond_4 ; CHECK-NEXT: bne $s1, $s2, .LBB1_4 ; CHECK-NEXT: # %bb.16: # %cond_4 ; CHECK-NEXT: st.d $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_17) -; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_17) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_13) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_13) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_4: # %cond_5 ; CHECK-NEXT: bne $fp, $s0, .LBB1_5 @@ -468,8 +468,8 @@ ; CHECK-NEXT: bne $t7, $t8, .LBB1_6 ; CHECK-NEXT: # %bb.20: # %cond_6 ; CHECK-NEXT: st.d $t8, $sp, 0 -; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_21) -; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_21) +; CHECK-NEXT: pcalau12i $t8, %pc_hi20(.LBB1_11) +; CHECK-NEXT: addi.d $t8, $t8, %pc_lo12(.LBB1_11) ; CHECK-NEXT: jr $t8 ; CHECK-NEXT: .LBB1_6: # %space ; CHECK-NEXT: #APP @@ -478,12 +478,6 @@ ; CHECK-NEXT: b .LBB1_7 ; CHECK-NEXT: .LBB1_11: # %dest_1 ; CHECK-NEXT: ld.d $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_7 -; CHECK-NEXT: .LBB1_15: # %dest_1 -; CHECK-NEXT: ld.d $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_7 -; CHECK-NEXT: .LBB1_21: # %dest_1 -; CHECK-NEXT: ld.d $t8, $sp, 0 ; CHECK-NEXT: .LBB1_7: # %dest_1 ; CHECK-NEXT: #APP ; CHECK-NEXT: # dest 1 @@ -491,9 +485,6 @@ ; CHECK-NEXT: b .LBB1_8 ; CHECK-NEXT: .LBB1_13: # %dest_2 ; CHECK-NEXT: ld.d $t8, $sp, 0 -; CHECK-NEXT: b .LBB1_8 -; CHECK-NEXT: .LBB1_17: # %dest_2 -; CHECK-NEXT: ld.d $t8, $sp, 0 ; CHECK-NEXT: .LBB1_8: # %dest_2 ; CHECK-NEXT: #APP ; CHECK-NEXT: # dest 2 Index: llvm/test/CodeGen/RISCV/branch-relaxation.ll =================================================================== --- llvm/test/CodeGen/RISCV/branch-relaxation.ll +++ llvm/test/CodeGen/RISCV/branch-relaxation.ll @@ -3375,12 +3375,12 @@ ; CHECK-RV32-NEXT: bne t1, t2, .LBB7_3 ; CHECK-RV32-NEXT: # %bb.14: # %cond_3 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_15, s11 +; CHECK-RV32-NEXT: jump .LBB7_11, s11 ; CHECK-RV32-NEXT: .LBB7_3: # %cond_4 ; CHECK-RV32-NEXT: bne s9, s10, .LBB7_4 ; CHECK-RV32-NEXT: # %bb.16: # %cond_4 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_17, s11 +; CHECK-RV32-NEXT: jump .LBB7_13, s11 ; CHECK-RV32-NEXT: .LBB7_4: # %cond_5 ; CHECK-RV32-NEXT: bne s7, s8, .LBB7_5 ; CHECK-RV32-NEXT: # %bb.18: # %cond_5 @@ -3390,7 +3390,7 @@ ; CHECK-RV32-NEXT: bne s5, s6, .LBB7_6 ; CHECK-RV32-NEXT: # %bb.20: # %cond_6 ; CHECK-RV32-NEXT: sw s11, 0(sp) -; CHECK-RV32-NEXT: jump .LBB7_21, s11 +; CHECK-RV32-NEXT: jump .LBB7_11, s11 ; CHECK-RV32-NEXT: .LBB7_23: # %space ; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_6: # %space @@ -3400,12 +3400,6 @@ ; CHECK-RV32-NEXT: j .LBB7_7 ; CHECK-RV32-NEXT: .LBB7_11: # %dest_1 ; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_7 -; CHECK-RV32-NEXT: .LBB7_15: # %dest_1 -; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_7 -; CHECK-RV32-NEXT: .LBB7_21: # %dest_1 -; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_7: # %dest_1 ; CHECK-RV32-NEXT: #APP ; CHECK-RV32-NEXT: # dest 1 @@ -3413,9 +3407,6 @@ ; CHECK-RV32-NEXT: j .LBB7_8 ; CHECK-RV32-NEXT: .LBB7_13: # %dest_2 ; CHECK-RV32-NEXT: lw s11, 0(sp) -; CHECK-RV32-NEXT: j .LBB7_8 -; CHECK-RV32-NEXT: .LBB7_17: # %dest_2 -; CHECK-RV32-NEXT: lw s11, 0(sp) ; CHECK-RV32-NEXT: .LBB7_8: # %dest_2 ; CHECK-RV32-NEXT: #APP ; CHECK-RV32-NEXT: # dest 2