diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -583,15 +583,14 @@ } /// Insert an unconditional indirect branch at the end of \p MBB to \p - /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to + /// NewDestBB. Optiionally, insert the clobbered register restoring in \p + /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to /// the offset of the position to insert the new branch. - /// - /// \returns The number of bytes added to the block. - virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset = 0, - RegScavenger *RS = nullptr) const { + virtual void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const { llvm_unreachable("target did not implement"); } diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -463,10 +463,62 @@ DebugLoc DL = MI.getDebugLoc(); MI.eraseFromParent(); - BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( - *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); + // Create the optional restore block and, initially, place it at the end of + // function. That block will be placed later if it's used; otherwise, it will + // be erased. + MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + + TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, + DestOffset - SrcOffset, RS.get()); + + BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); + + // If RestoreBB is required, try to place just before DestBB. + if (!RestoreBB->empty()) { + // TODO: For multiple far branches to the same destination, there are + // chances that some restore blocks could be shared if they clobber the + // same registers and share the same restore sequence. So far, those + // restore blocks are just duplicated for each far branch. + if (DestBB->isEntryBlock()) { + // If DestBB is the entry block, create a new empty entry block falling + // through into DestBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(DestBB->getBasicBlock()); + MF->push_front(NewBB); + BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), + BasicBlockInfo()); + // So far, this new empty entry block has the correct blockinfo, i.e. + // both zero size and offset. + NewBB->addSuccessor(DestBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *NewBB); + } + assert(!DestBB->isEntryBlock()); + MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); + if (auto *FT = PrevBB->getFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, DestBB, DebugLoc()); + // Recalculate the block size. + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); + } + // Now, RestoreBB could be placed directly before DestBB. + MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); + // Update successors and predecessors. + RestoreBB->addSuccessor(DestBB); + BranchBB->replaceSuccessor(DestBB, RestoreBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *RestoreBB); + // Compute the restore block size. + BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); + // Update the offset starting from the previous block. + adjustBlockOffsets(*PrevBB); + } else { + // Remove restore block if it's not required. + MF->erase(RestoreBB); + } + return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -271,11 +271,10 @@ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2208,15 +2208,17 @@ return MI.getOperand(0).getMBB(); } -unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); + assert(RestoreBB.empty() && + "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -2253,14 +2255,6 @@ BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) .addReg(PCReg); - auto ComputeBlockSize = [](const TargetInstrInfo *TII, - const MachineBasicBlock &MBB) { - unsigned Size = 0; - for (const MachineInstr &MI : MBB) - Size += TII->getInstSizeInBytes(MI); - return Size; - }; - // FIXME: If spilling is necessary, this will fail because this scavenger has // no emergency stack slots. It is non-trivial to spill in this situation, // because the restore code needs to be specially placed after the @@ -2299,22 +2293,34 @@ RS->enterBasicBlockEnd(MBB); Register Scav = RS->scavengeRegisterBackwards( - AMDGPU::SReg_64RegClass, - MachineBasicBlock::iterator(GetPC), false, 0); - MRI.replaceRegWith(PCReg, Scav); - MRI.clearVirtRegs(); - RS->setRegUsed(Scav); + AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), + /* RestoreAfter */ false, 0, /* AllowSpill */ false); + if (Scav) { + RS->setRegUsed(Scav); + MRI.replaceRegWith(PCReg, Scav); + MRI.clearVirtRegs(); + } else { + // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for + // SGPR spill. + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); + MRI.clearVirtRegs(); + } + MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); // Now, the distance could be defined. auto *Offset = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx), + MCSymbolRefExpr::create(DestLabel, MCCtx), MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); // Add offset assignments. auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); auto *ShAmt = MCConstantExpr::create(32, MCCtx); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); - return ComputeBlockSize(this, MBB); + + return; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -122,6 +122,10 @@ LiveIntervals *LIS = nullptr, bool OnlyToVGPR = false) const; + bool spillEmergencySGPR(MachineBasicBlock::iterator MI, + MachineBasicBlock &RestoreMBB, Register SGPR, + RegScavenger *RS) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -97,7 +97,7 @@ unsigned EltSize = 4; RegScavenger *RS; - MachineBasicBlock &MBB; + MachineBasicBlock *MBB; MachineFunction &MF; SIMachineFunctionInfo &MFI; const SIInstrInfo &TII; @@ -110,9 +110,14 @@ SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) - : SuperReg(MI->getOperand(0).getReg()), MI(MI), - IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), - RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), + : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(), + MI->getOperand(0).isKill(), Index, RS) {} + + SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, + bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, + bool IsKill, int Index, RegScavenger *RS) + : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()), + Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()), MFI(*MF.getInfo()), TII(TII), TRI(TRI), IsWave32(IsWave32) { const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); @@ -189,8 +194,9 @@ if (SavedExecReg) { RS->setRegUsed(SavedExecReg); // Set exec to needed lanes - BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); - auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); + BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); + auto I = + BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); if (!TmpVGPRLive) I.addReg(TmpVGPR, RegState::ImplicitDefine); // Spill needed lanes @@ -201,7 +207,7 @@ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, /*IsKill*/ false); // Spill inactive lanes - auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); if (!TmpVGPRLive) I.addReg(TmpVGPR, RegState::ImplicitDefine); TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); @@ -224,7 +230,7 @@ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, /*IsKill*/ false); // Restore exec - auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) + auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg) .addReg(SavedExecReg, RegState::Kill); // Add an implicit use of the load so it is not dead. // FIXME This inserts an unnecessary waitcnt @@ -235,7 +241,7 @@ // Restore inactive lanes TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, /*IsKill*/ false); - auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); if (!TmpVGPRLive) { I.addReg(TmpVGPR, RegState::ImplicitKill); } @@ -261,11 +267,17 @@ TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, /*IsKill*/ false); // Spill inactive lanes - BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); - BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); } } + + void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) { + assert(MBB->getParent() == &MF); + MI = NewMI; + MBB = NewMBB; + } }; } // namespace llvm @@ -1307,13 +1319,13 @@ if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, - Offset * SB.EltSize, MMO, SB.RS); + buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, + FrameReg, Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill SB.MFI.addToSpilledVGPRs(1); } @@ -1347,8 +1359,8 @@ // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), - Spill.VGPR) + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, + SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR); @@ -1394,7 +1406,7 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); MachineInstrBuilder WriteLane = - BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), SB.TmpVGPR) .addReg(SubReg, SubKillState) .addImm(i % PVD.PerVGPR) @@ -1456,10 +1468,10 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = - BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); if (SB.NumSubRegs > 1 && i == 0) MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); if (LIS) { @@ -1490,7 +1502,7 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); bool LastSubReg = (i + 1 == e); - auto MIB = BuildMI(SB.MBB, MI, SB.DL, + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) .addImm(i); @@ -1516,6 +1528,75 @@ return true; } +bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, + MachineBasicBlock &RestoreMBB, + Register SGPR, RegScavenger *RS) const { + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0, + RS); + SB.prepare(); + // Generate the spill of SGPR to SB.TmpVGPR. + unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); + auto PVD = SB.getPerVGPRData(); + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { + unsigned TmpVGPRFlags = RegState::Undef; + // Write sub registers into the VGPR + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); + i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); + + MachineInstrBuilder WriteLane = + BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + SB.TmpVGPR) + .addReg(SubReg, SubKillState) + .addImm(i % PVD.PerVGPR) + .addReg(SB.TmpVGPR, TmpVGPRFlags); + TmpVGPRFlags = 0; + // There could be undef components of a spilled super register. + // TODO: Can we detect this and skip the spill? + if (SB.NumSubRegs > 1) { + // The last implicit use of the SB.SuperReg carries the "Kill" flag. + unsigned SuperKillState = 0; + if (i + 1 == SB.NumSubRegs) + SuperKillState |= getKillRegState(SB.IsKill); + WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); + } + } + // Don't need to write VGPR out. + } + + // Restore clobbered registers in the specified restore block. + MI = RestoreMBB.end(); + SB.setMI(&RestoreMBB, MI); + // Generate the restore of SGPR from SB.TmpVGPR. + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { + // Don't need to load VGPR in. + // Unpack lanes + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); + i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); + bool LastSubReg = (i + 1 == e); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); + } + } + SB.restore(); + + SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); + return false; +} + /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to /// a VGPR and the stack slot can be safely eliminated when all other users are /// handled. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -107,11 +107,11 @@ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; + private: const AVRRegisterInfo RI; }; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -555,20 +555,19 @@ } } -unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { - // This method inserts a *direct* branch (JMP), despite its name. - // LLVM calls this method to fixup unconditional branches; it never calls - // insertBranch or some hypothetical "insertDirectBranch". - // See lib/CodeGen/RegisterRelaxation.cpp for details. - // We end up here when a jump is too long for a RJMP instruction. - auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); - - return getInstSizeInBytes(MI); +void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { + // This method inserts a *direct* branch (JMP), despite its name. + // LLVM calls this method to fixup unconditional branches; it never calls + // insertBranch or some hypothetical "insertDirectBranch". + // See lib/CodeGen/RegisterRelaxation.cpp for details. + // We end up here when a jump is too long for a RJMP instruction. + BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); + + return; } } // end of namespace llvm - diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -68,10 +68,10 @@ const DebugLoc &dl, int *BytesAdded = nullptr) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -641,11 +641,11 @@ return 2; } -unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); @@ -674,7 +674,8 @@ MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); RS->setRegUsed(Scav); - return 8; + // TODO: The case when there is no scavenged register needs special handling. + return; } bool RISCVInstrInfo::reverseBranchCondition( diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -1,11 +1,672 @@ -; RUN: not --crash llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s - -; FIXME: This should be able to compile, but requires inserting an -; extra block to restore the scavenged register. - -; FAIL: LLVM ERROR: Error while trying to spill SGPR0_SGPR1 from class SReg_64: Cannot scavenge register without an emergency spill slot! +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -o - %s | FileCheck %s define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { +; CHECK-LABEL: spill: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b32 s96, SCRATCH_RSRC_DWORD0 +; CHECK-NEXT: s_mov_b32 s97, SCRATCH_RSRC_DWORD1 +; CHECK-NEXT: s_load_dword s44, s[0:1], 0xb +; CHECK-NEXT: s_mov_b32 s98, -1 +; CHECK-NEXT: s_mov_b32 s99, 0xe8f000 +; CHECK-NEXT: s_add_u32 s96, s96, s3 +; CHECK-NEXT: s_addc_u32 s97, s97, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_eq_u32 s44, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s3, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s7, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s9, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s10, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s11, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s12, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s13, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s14, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s15, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s16, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s17, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s18, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s19, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s20, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s21, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s22, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s23, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s24, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s25, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s26, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s27, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s28, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s29, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s30, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s31, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s33, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s34, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s35, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s36, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s37, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s38, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s39, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s40, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s41, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s42, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s43, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s45, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s46, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s47, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s48, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s49, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s50, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s51, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s52, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s53, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s54, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s55, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s56, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s57, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s58, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s59, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s60, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s61, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s62, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s63, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s64, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s65, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s66, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s67, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s68, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s69, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s70, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s71, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s72, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s73, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s74, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s75, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s76, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s77, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s78, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s79, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s80, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s81, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s82, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s83, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s84, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s85, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s86, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s87, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s88, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s89, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s90, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s91, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s92, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s93, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s94, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s95, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s96, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s97, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s98, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s99, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s100, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s101, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_lo, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_hi, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cbranch_scc0 BB0_1 +; CHECK-NEXT: BB0_3: ; %entry +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: buffer_store_dword v0, off, s[96:99], 0 +; CHECK-NEXT: v_writelane_b32 v0, s0, 0 +; CHECK-NEXT: v_writelane_b32 v0, s1, 1 +; CHECK-NEXT: s_getpc_b64 s[0:1] +; CHECK-NEXT: .Lpost_getpc0: +; CHECK-NEXT: s_add_u32 s0, s0, (BB0_4-.Lpost_getpc0)&4294967295 +; CHECK-NEXT: s_addc_u32 s1, s1, (BB0_4-.Lpost_getpc0)>>32 +; CHECK-NEXT: s_setpc_b64 s[0:1] +; CHECK-NEXT: BB0_1: ; %bb2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_branch BB0_2 +; CHECK-NEXT: BB0_4: ; %bb3 +; CHECK-NEXT: v_readlane_b32 s0, v0, 0 +; CHECK-NEXT: v_readlane_b32 s1, v0, 1 +; CHECK-NEXT: buffer_load_dword v0, off, s[96:99], 0 +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: BB0_2: ; %bb3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s2 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s3 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s4 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s5 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s6 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s8 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s9 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s10 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s11 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s12 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s13 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s14 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s15 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s16 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s17 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s18 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s19 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s20 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s21 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s22 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s23 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s24 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s25 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s26 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s27 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s28 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s29 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s30 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s31 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s32 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s33 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s34 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s35 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s36 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s37 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s38 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s39 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s40 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s41 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s43 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s44 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s45 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s46 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s47 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s48 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s49 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s50 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s51 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s52 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s53 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s54 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s55 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s56 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s57 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s58 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s59 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s60 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s61 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s62 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s63 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s65 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s66 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s67 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s68 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s69 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s70 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s71 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s72 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s73 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s74 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s75 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s76 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s77 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s78 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s79 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s80 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s81 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s82 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s83 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s84 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s85 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s86 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s87 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s88 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s89 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s90 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s91 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s92 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s93 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s94 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s95 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s96 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s97 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s98 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s99 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s100 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s101 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_lo +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_hi +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm entry: %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 @@ -114,10 +775,14 @@ %cmp = icmp eq i32 %cnd, 0 br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch -bb2: ; 28 bytes - ; 24 byte asm +bb2: ; 68 bytes + ; 64 byte asm call void asm sideeffect "v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 v_nop_e64 v_nop_e64 v_nop_e64",""() #0