diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -582,15 +582,14 @@ } /// Insert an unconditional indirect branch at the end of \p MBB to \p - /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to + /// NewDestBB. Optionally, insert the clobbered register restoring in \p + /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to /// the offset of the position to insert the new branch. - /// - /// \returns The number of bytes added to the block. - virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset = 0, - RegScavenger *RS = nullptr) const { + virtual void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const { llvm_unreachable("target did not implement"); } diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -463,10 +463,48 @@ DebugLoc DL = MI.getDebugLoc(); MI.eraseFromParent(); - BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( - *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); + // Create the optional restore block and, initially, place it at the end of + // function. That block will be placed later if it's used; otherwise, it will + // be erased. + MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + + TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, + DestOffset - SrcOffset, RS.get()); + + BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); + + // If RestoreBB is required, try to place just before DestBB. + if (!RestoreBB->empty()) { + // TODO: For multiple far branches to the same destination, there are + // chances that some restore blocks could be shared if they clobber the + // same registers and share the same restore sequence. So far, those + // restore blocks are just duplicated for each far branch. + assert(!DestBB->isEntryBlock()); + MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); + if (auto *FT = PrevBB->getFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, DestBB, DebugLoc()); + // Recalculate the block size. + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); + } + // Now, RestoreBB could be placed directly before DestBB. + MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); + // Update successors and predecessors. + RestoreBB->addSuccessor(DestBB); + BranchBB->replaceSuccessor(DestBB, RestoreBB); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *RestoreBB); + // Compute the restore block size. + BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB); + // Update the offset starting from the previous block. + adjustBlockOffsets(*PrevBB); + } else { + // Remove restore block if it's not required. + MF->erase(RestoreBB); + } + return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -275,11 +275,10 @@ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2223,15 +2223,17 @@ return MI.getOperand(0).getMBB(); } -unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); + assert(RestoreBB.empty() && + "restore block should be inserted for restoring clobbered registers"); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -2268,14 +2270,6 @@ BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)) .addReg(PCReg); - auto ComputeBlockSize = [](const TargetInstrInfo *TII, - const MachineBasicBlock &MBB) { - unsigned Size = 0; - for (const MachineInstr &MI : MBB) - Size += TII->getInstSizeInBytes(MI); - return Size; - }; - // FIXME: If spilling is necessary, this will fail because this scavenger has // no emergency stack slots. It is non-trivial to spill in this situation, // because the restore code needs to be specially placed after the @@ -2314,22 +2308,34 @@ RS->enterBasicBlockEnd(MBB); Register Scav = RS->scavengeRegisterBackwards( - AMDGPU::SReg_64RegClass, - MachineBasicBlock::iterator(GetPC), false, 0); - MRI.replaceRegWith(PCReg, Scav); - MRI.clearVirtRegs(); - RS->setRegUsed(Scav); + AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC), + /* RestoreAfter */ false, 0, /* AllowSpill */ false); + if (Scav) { + RS->setRegUsed(Scav); + MRI.replaceRegWith(PCReg, Scav); + MRI.clearVirtRegs(); + } else { + // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for + // SGPR spill. + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS); + MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1); + MRI.clearVirtRegs(); + } + MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol(); // Now, the distance could be defined. auto *Offset = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx), + MCSymbolRefExpr::create(DestLabel, MCCtx), MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx); // Add offset assignments. auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx); OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); auto *ShAmt = MCConstantExpr::create(32, MCCtx); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); - return ComputeBlockSize(this, MBB); + + return; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -130,6 +130,10 @@ LiveIntervals *LIS = nullptr, bool OnlyToVGPR = false) const; + bool spillEmergencySGPR(MachineBasicBlock::iterator MI, + MachineBasicBlock &RestoreMBB, Register SGPR, + RegScavenger *RS) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -97,7 +97,7 @@ unsigned EltSize = 4; RegScavenger *RS; - MachineBasicBlock &MBB; + MachineBasicBlock *MBB; MachineFunction &MF; SIMachineFunctionInfo &MFI; const SIInstrInfo &TII; @@ -110,9 +110,14 @@ SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS) - : SuperReg(MI->getOperand(0).getReg()), MI(MI), - IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), - RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), + : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(), + MI->getOperand(0).isKill(), Index, RS) {} + + SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, + bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, + bool IsKill, int Index, RegScavenger *RS) + : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()), + Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()), MFI(*MF.getInfo()), TII(TII), TRI(TRI), IsWave32(IsWave32) { const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); @@ -189,8 +194,9 @@ if (SavedExecReg) { RS->setRegUsed(SavedExecReg); // Set exec to needed lanes - BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); - auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); + BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); + auto I = + BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); if (!TmpVGPRLive) I.addReg(TmpVGPR, RegState::ImplicitDefine); // Spill needed lanes @@ -201,7 +207,7 @@ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, /*IsKill*/ false); // Spill inactive lanes - auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); if (!TmpVGPRLive) I.addReg(TmpVGPR, RegState::ImplicitDefine); TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); @@ -224,7 +230,7 @@ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, /*IsKill*/ false); // Restore exec - auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) + auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg) .addReg(SavedExecReg, RegState::Kill); // Add an implicit use of the load so it is not dead. // FIXME This inserts an unnecessary waitcnt @@ -235,7 +241,7 @@ // Restore inactive lanes TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, /*IsKill*/ false); - auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); if (!TmpVGPRLive) { I.addReg(TmpVGPR, RegState::ImplicitKill); } @@ -261,11 +267,17 @@ TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, /*IsKill*/ false); // Spill inactive lanes - BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); - BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); } } + + void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) { + assert(MBB->getParent() == &MF); + MI = NewMI; + MBB = NewMBB; + } }; } // namespace llvm @@ -1337,13 +1349,13 @@ if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, - Offset * SB.EltSize, MMO, SB.RS); + buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, + FrameReg, Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill SB.MFI.addToSpilledVGPRs(1); } @@ -1381,8 +1393,8 @@ // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), - Spill.VGPR) + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, + SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR); @@ -1428,7 +1440,7 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); MachineInstrBuilder WriteLane = - BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), SB.TmpVGPR) .addReg(SubReg, SubKillState) .addImm(i % PVD.PerVGPR) @@ -1490,10 +1502,10 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = - BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); if (SB.NumSubRegs > 1 && i == 0) MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); if (LIS) { @@ -1524,7 +1536,7 @@ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); bool LastSubReg = (i + 1 == e); - auto MIB = BuildMI(SB.MBB, MI, SB.DL, + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) .addImm(i); @@ -1550,6 +1562,75 @@ return true; } +bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, + MachineBasicBlock &RestoreMBB, + Register SGPR, RegScavenger *RS) const { + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0, + RS); + SB.prepare(); + // Generate the spill of SGPR to SB.TmpVGPR. + unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); + auto PVD = SB.getPerVGPRData(); + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { + unsigned TmpVGPRFlags = RegState::Undef; + // Write sub registers into the VGPR + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); + i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); + + MachineInstrBuilder WriteLane = + BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + SB.TmpVGPR) + .addReg(SubReg, SubKillState) + .addImm(i % PVD.PerVGPR) + .addReg(SB.TmpVGPR, TmpVGPRFlags); + TmpVGPRFlags = 0; + // There could be undef components of a spilled super register. + // TODO: Can we detect this and skip the spill? + if (SB.NumSubRegs > 1) { + // The last implicit use of the SB.SuperReg carries the "Kill" flag. + unsigned SuperKillState = 0; + if (i + 1 == SB.NumSubRegs) + SuperKillState |= getKillRegState(SB.IsKill); + WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); + } + } + // Don't need to write VGPR out. + } + + // Restore clobbered registers in the specified restore block. + MI = RestoreMBB.end(); + SB.setMI(&RestoreMBB, MI); + // Generate the restore of SGPR from SB.TmpVGPR. + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { + // Don't need to load VGPR in. + // Unpack lanes + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); + i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); + bool LastSubReg = (i + 1 == e); + auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); + } + } + SB.restore(); + + SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); + return false; +} + /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to /// a VGPR and the stack slot can be safely eliminated when all other users are /// handled. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h --- a/llvm/lib/Target/AVR/AVRInstrInfo.h +++ b/llvm/lib/Target/AVR/AVRInstrInfo.h @@ -107,10 +107,10 @@ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; private: const AVRRegisterInfo RI; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -560,19 +560,19 @@ } } -unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { // This method inserts a *direct* branch (JMP), despite its name. // LLVM calls this method to fixup unconditional branches; it never calls // insertBranch or some hypothetical "insertDirectBranch". // See lib/CodeGen/RegisterRelaxation.cpp for details. // We end up here when a jump is too long for a RJMP instruction. - auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); + BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); - return getInstSizeInBytes(MI); + return; } } // end of namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -85,10 +85,10 @@ const DebugLoc &dl, int *BytesAdded = nullptr) const override; - unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, int64_t BrOffset, - RegScavenger *RS = nullptr) const override; + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, RegScavenger *RS) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -684,11 +684,11 @@ return 2; } -unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &DestBB, - const DebugLoc &DL, - int64_t BrOffset, - RegScavenger *RS) const { +void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); @@ -714,10 +714,11 @@ RS->enterBasicBlockEnd(MBB); unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), false, 0); + // TODO: The case when there is no scavenged register needs special handling. + assert(Scav != RISCV::NoRegister && "No register is scavenged!"); MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); RS->setRegUsed(Scav); - return 8; } bool RISCVInstrInfo::reverseBranchCondition( diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -1,12 +1,1714 @@ -; RUN: not --crash llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -o - %s | FileCheck %s -; FIXME: This should be able to compile, but requires inserting an -; extra block to restore the scavenged register. +define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { +; CHECK-LABEL: spill: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dword s27, s[4:5], 0x2 +; CHECK-NEXT: s_mov_b64 s[98:99], s[2:3] +; CHECK-NEXT: s_mov_b64 s[96:97], s[0:1] +; CHECK-NEXT: s_add_u32 s96, s96, s7 +; CHECK-NEXT: s_addc_u32 s97, s97, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_eq_u32 s27, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s3, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s7, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s9, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s10, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s11, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s12, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s13, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s14, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s15, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s16, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s17, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s18, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s19, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s20, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s21, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s22, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s23, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s24, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s25, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s26, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s27, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s28, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s29, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s30, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s31, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s33, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s34, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s35, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s36, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s37, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s38, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s39, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s40, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s41, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s42, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s43, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s45, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s46, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s47, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s48, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s49, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s50, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s51, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s52, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s53, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s54, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s55, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s56, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s57, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s58, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s59, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s60, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s61, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s62, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s63, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s64, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s65, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s66, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s67, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s68, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s69, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s70, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s71, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s72, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s73, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s74, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s75, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s76, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s77, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s78, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s79, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s80, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s81, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s82, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s83, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s84, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s85, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s86, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s87, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s88, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s89, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s90, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s91, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s92, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s93, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s94, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s95, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s96, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s97, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s98, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s99, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s100, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s101, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_lo, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_hi, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cbranch_scc0 BB0_1 +; CHECK-NEXT: BB0_3: ; %entry +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: buffer_store_dword v0, off, s[96:99], 0 +; CHECK-NEXT: v_writelane_b32 v0, s0, 0 +; CHECK-NEXT: v_writelane_b32 v0, s1, 1 +; CHECK-NEXT: s_getpc_b64 s[0:1] +; CHECK-NEXT: .Lpost_getpc0: +; CHECK-NEXT: s_add_u32 s0, s0, (BB0_4-.Lpost_getpc0)&4294967295 +; CHECK-NEXT: s_addc_u32 s1, s1, (BB0_4-.Lpost_getpc0)>>32 +; CHECK-NEXT: s_setpc_b64 s[0:1] +; CHECK-NEXT: BB0_1: ; %bb2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_branch BB0_2 +; CHECK-NEXT: BB0_4: ; %bb3 +; CHECK-NEXT: v_readlane_b32 s0, v0, 0 +; CHECK-NEXT: v_readlane_b32 s1, v0, 1 +; CHECK-NEXT: buffer_load_dword v0, off, s[96:99], 0 +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: BB0_2: ; %bb3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s2 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s3 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s4 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s5 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s6 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s8 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s9 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s10 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s11 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s12 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s13 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s14 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s15 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s16 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s17 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s18 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s19 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s20 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s21 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s22 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s23 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s24 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s25 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s26 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s27 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s28 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s29 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s30 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s31 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s32 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s33 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s34 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s35 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s36 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s37 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s38 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s39 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s40 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s41 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s43 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s44 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s45 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s46 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s47 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s48 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s49 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s50 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s51 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s52 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s53 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s54 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s55 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s56 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s57 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s58 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s59 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s60 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s61 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s62 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s63 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s65 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s66 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s67 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s68 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s69 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s70 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s71 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s72 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s73 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s74 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s75 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s76 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s77 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s78 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s79 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s80 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s81 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s82 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s83 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s84 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s85 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s86 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s87 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s88 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s89 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s90 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s91 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s92 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s93 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s94 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s95 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s96 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s97 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s98 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s99 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s100 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s101 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_lo +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_hi +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm +entry: + %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 + %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 + %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 + %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0 + %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0 + %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0 + %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0 + %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0 + %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0 + %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0 + %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0 + %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0 + %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0 + %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0 + %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0 + %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0 + %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0 + %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0 + %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0 + %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0 + %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0 + %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0 + %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0 + %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0 + %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0 + %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0 + %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0 + %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0 + %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0 + %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0 + %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0 + %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0 + %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0 + %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0 + %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0 + %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0 + %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0 + %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0 + %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0 + %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0 + %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0 + %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0 + %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0 + %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0 + %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0 + %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0 + %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0 + %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0 + %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0 + %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0 + %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0 + %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0 + %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0 + %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0 + %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0 + %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0 + %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0 + %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0 + %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0 + %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0 + %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0 + %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0 + %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0 + %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0 + %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0 + %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0 + %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0 + %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0 + %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0 + %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0 + %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0 + %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0 + %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0 + %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0 + %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0 + %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0 + %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0 + %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0 + %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0 + %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0 + %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0 + %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0 + %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0 + %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0 + %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0 + %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0 + %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0 + %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0 + %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0 + %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0 + %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0 + %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0 + %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0 + %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0 + %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0 + %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0 + %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0 + %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0 + %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0 + %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0 + %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0 + %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0 + %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() #0 + %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() #0 + %cmp = icmp eq i32 %cnd, 0 + br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch + +bb2: ; 68 bytes + ; 64 byte asm + call void asm sideeffect + "v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64",""() #0 + br label %bb3 -; FAIL: LLVM ERROR: Error while trying to spill SGPR0_SGPR1 from class SReg_64: Cannot scavenge register without an emergency spill slot! +bb3: + tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0 + tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0 + tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0 + tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0 + tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0 + tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0 + tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0 + tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0 + tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0 + tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0 + tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0 + tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0 + tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0 + tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0 + tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0 + tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0 + tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0 + tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0 + tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0 + tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0 + tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0 + tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0 + tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0 + tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0 + tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0 + tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0 + tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0 + tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0 + tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0 + tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0 + tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0 + tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0 + tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0 + tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0 + tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0 + tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0 + tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0 + tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0 + tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0 + tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0 + tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0 + tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0 + tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0 + tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0 + tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0 + tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0 + tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0 + tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0 + tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0 + tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0 + tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0 + tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0 + tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0 + tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0 + tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0 + tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0 + tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0 + tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0 + tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0 + tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0 + tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0 + tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0 + tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0 + tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0 + tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0 + tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0 + tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0 + tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0 + tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0 + tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0 + tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0 + tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0 + tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0 + tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0 + tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0 + tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0 + tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0 + tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0 + tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0 + tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0 + tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0 + tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0 + tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0 + tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0 + tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0 + tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0 + tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0 + tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0 + tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0 + tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0 + tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0 + tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0 + tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0 + tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0 + tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0 + tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0 + tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0 + tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0 + tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0 + tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0 + tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0 + tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0 + ret void +} -define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { +define void @spill_func(i32 addrspace(1)* %arg) #0 { +; CHECK-LABEL: spill_func: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_waitcnt expcnt(1) +; CHECK-NEXT: v_writelane_b32 v0, s33, 0 +; CHECK-NEXT: v_writelane_b32 v0, s34, 1 +; CHECK-NEXT: v_writelane_b32 v0, s35, 2 +; CHECK-NEXT: v_writelane_b32 v0, s36, 3 +; CHECK-NEXT: v_writelane_b32 v0, s37, 4 +; CHECK-NEXT: v_writelane_b32 v0, s38, 5 +; CHECK-NEXT: v_writelane_b32 v0, s39, 6 +; CHECK-NEXT: v_writelane_b32 v0, s40, 7 +; CHECK-NEXT: v_writelane_b32 v0, s41, 8 +; CHECK-NEXT: v_writelane_b32 v0, s42, 9 +; CHECK-NEXT: v_writelane_b32 v0, s43, 10 +; CHECK-NEXT: v_writelane_b32 v0, s44, 11 +; CHECK-NEXT: v_writelane_b32 v0, s45, 12 +; CHECK-NEXT: v_writelane_b32 v0, s46, 13 +; CHECK-NEXT: v_writelane_b32 v0, s47, 14 +; CHECK-NEXT: v_writelane_b32 v0, s48, 15 +; CHECK-NEXT: v_writelane_b32 v0, s49, 16 +; CHECK-NEXT: v_writelane_b32 v0, s50, 17 +; CHECK-NEXT: v_writelane_b32 v0, s51, 18 +; CHECK-NEXT: v_writelane_b32 v0, s52, 19 +; CHECK-NEXT: v_writelane_b32 v0, s53, 20 +; CHECK-NEXT: v_writelane_b32 v0, s54, 21 +; CHECK-NEXT: v_writelane_b32 v0, s55, 22 +; CHECK-NEXT: v_writelane_b32 v0, s56, 23 +; CHECK-NEXT: v_writelane_b32 v0, s57, 24 +; CHECK-NEXT: v_writelane_b32 v0, s58, 25 +; CHECK-NEXT: v_writelane_b32 v0, s59, 26 +; CHECK-NEXT: v_writelane_b32 v0, s60, 27 +; CHECK-NEXT: v_writelane_b32 v0, s61, 28 +; CHECK-NEXT: v_writelane_b32 v0, s62, 29 +; CHECK-NEXT: v_writelane_b32 v0, s63, 30 +; CHECK-NEXT: v_writelane_b32 v0, s64, 31 +; CHECK-NEXT: v_writelane_b32 v0, s65, 32 +; CHECK-NEXT: v_writelane_b32 v0, s66, 33 +; CHECK-NEXT: v_writelane_b32 v0, s67, 34 +; CHECK-NEXT: v_writelane_b32 v0, s68, 35 +; CHECK-NEXT: v_writelane_b32 v0, s69, 36 +; CHECK-NEXT: v_writelane_b32 v0, s70, 37 +; CHECK-NEXT: v_writelane_b32 v0, s71, 38 +; CHECK-NEXT: v_writelane_b32 v0, s72, 39 +; CHECK-NEXT: v_writelane_b32 v0, s73, 40 +; CHECK-NEXT: v_writelane_b32 v0, s74, 41 +; CHECK-NEXT: v_writelane_b32 v0, s75, 42 +; CHECK-NEXT: v_writelane_b32 v0, s76, 43 +; CHECK-NEXT: v_writelane_b32 v0, s77, 44 +; CHECK-NEXT: v_writelane_b32 v0, s78, 45 +; CHECK-NEXT: v_writelane_b32 v0, s79, 46 +; CHECK-NEXT: v_writelane_b32 v0, s80, 47 +; CHECK-NEXT: v_writelane_b32 v0, s81, 48 +; CHECK-NEXT: v_writelane_b32 v0, s82, 49 +; CHECK-NEXT: v_writelane_b32 v0, s83, 50 +; CHECK-NEXT: v_writelane_b32 v0, s84, 51 +; CHECK-NEXT: v_writelane_b32 v0, s85, 52 +; CHECK-NEXT: v_writelane_b32 v0, s86, 53 +; CHECK-NEXT: v_writelane_b32 v0, s87, 54 +; CHECK-NEXT: v_writelane_b32 v0, s88, 55 +; CHECK-NEXT: v_writelane_b32 v0, s89, 56 +; CHECK-NEXT: v_writelane_b32 v0, s90, 57 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: v_writelane_b32 v1, s97, 0 +; CHECK-NEXT: v_writelane_b32 v0, s91, 58 +; CHECK-NEXT: v_writelane_b32 v1, s98, 1 +; CHECK-NEXT: v_writelane_b32 v0, s92, 59 +; CHECK-NEXT: v_writelane_b32 v1, s99, 2 +; CHECK-NEXT: v_writelane_b32 v0, s93, 60 +; CHECK-NEXT: v_writelane_b32 v1, s100, 3 +; CHECK-NEXT: v_writelane_b32 v0, s94, 61 +; CHECK-NEXT: v_writelane_b32 v1, s101, 4 +; CHECK-NEXT: v_writelane_b32 v0, s95, 62 +; CHECK-NEXT: v_writelane_b32 v1, s30, 5 +; CHECK-NEXT: s_mov_b32 s29, s4 +; CHECK-NEXT: v_writelane_b32 v0, s96, 63 +; CHECK-NEXT: v_writelane_b32 v1, s31, 6 +; CHECK-NEXT: s_cmp_eq_u32 s29, 0 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s2, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s3, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s7, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s9, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s10, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s11, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s12, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s13, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s14, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s15, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s16, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s17, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s18, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s19, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s20, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s21, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s22, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s23, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s24, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s25, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s26, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s27, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s28, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s29, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s30, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s31, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s32, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s33, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s34, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s35, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s36, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s37, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s38, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s39, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s40, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s41, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s42, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s43, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s45, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s46, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s47, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s48, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s49, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s50, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s51, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s52, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s53, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s54, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s55, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s56, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s57, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s58, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s59, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s60, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s61, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s62, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s63, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s64, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s65, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s66, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s67, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s68, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s69, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s70, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s71, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s72, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s73, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s74, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s75, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s76, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s77, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s78, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s79, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s80, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s81, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s82, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s83, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s84, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s85, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s86, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s87, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s88, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s89, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s90, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s91, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s92, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s93, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s94, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s95, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s96, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s97, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s98, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s99, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s100, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s101, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_lo, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 vcc_hi, 0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_cbranch_scc0 BB1_1 +; CHECK-NEXT: BB1_3: ; %entry +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 +; CHECK-NEXT: v_writelane_b32 v2, s0, 0 +; CHECK-NEXT: v_writelane_b32 v2, s1, 1 +; CHECK-NEXT: s_getpc_b64 s[0:1] +; CHECK-NEXT: .Lpost_getpc1: +; CHECK-NEXT: s_add_u32 s0, s0, (BB1_4-.Lpost_getpc1)&4294967295 +; CHECK-NEXT: s_addc_u32 s1, s1, (BB1_4-.Lpost_getpc1)>>32 +; CHECK-NEXT: s_setpc_b64 s[0:1] +; CHECK-NEXT: BB1_1: ; %bb2 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: v_nop_e64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_branch BB1_2 +; CHECK-NEXT: BB1_4: ; %bb3 +; CHECK-NEXT: v_readlane_b32 s0, v2, 0 +; CHECK-NEXT: v_readlane_b32 s1, v2, 1 +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 +; CHECK-NEXT: s_not_b64 exec, exec +; CHECK-NEXT: BB1_2: ; %bb3 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s1 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s2 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s3 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s4 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s5 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s4, v1, 5 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s6 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s8 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s9 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s10 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s11 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s12 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s13 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s14 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s15 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s16 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s17 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s18 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s19 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s20 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s21 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s22 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s23 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s24 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s25 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s26 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s27 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s28 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s29 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s30 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s31 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s32 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s33 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s34 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s35 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s36 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s37 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s38 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s39 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s40 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s41 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s43 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s44 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s45 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s46 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s47 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s48 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s49 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s50 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s51 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s52 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s53 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s54 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s55 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s56 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s57 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s58 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s59 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s60 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s61 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s62 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s63 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s64 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s65 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s66 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s67 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s68 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s69 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s70 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s71 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s72 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s73 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s74 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s75 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s76 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s77 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s78 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s79 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s80 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s81 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s82 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s83 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s84 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s85 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s86 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s87 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s88 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s89 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s90 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s91 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s92 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s93 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s94 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s95 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s96 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s97 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s98 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s99 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s100 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use s101 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_lo +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ; reg use vcc_hi +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s5, v1, 6 +; CHECK-NEXT: v_readlane_b32 s101, v1, 4 +; CHECK-NEXT: v_readlane_b32 s100, v1, 3 +; CHECK-NEXT: v_readlane_b32 s99, v1, 2 +; CHECK-NEXT: v_readlane_b32 s98, v1, 1 +; CHECK-NEXT: v_readlane_b32 s97, v1, 0 +; CHECK-NEXT: v_readlane_b32 s96, v0, 63 +; CHECK-NEXT: v_readlane_b32 s95, v0, 62 +; CHECK-NEXT: v_readlane_b32 s94, v0, 61 +; CHECK-NEXT: v_readlane_b32 s93, v0, 60 +; CHECK-NEXT: v_readlane_b32 s92, v0, 59 +; CHECK-NEXT: v_readlane_b32 s91, v0, 58 +; CHECK-NEXT: v_readlane_b32 s90, v0, 57 +; CHECK-NEXT: v_readlane_b32 s89, v0, 56 +; CHECK-NEXT: v_readlane_b32 s88, v0, 55 +; CHECK-NEXT: v_readlane_b32 s87, v0, 54 +; CHECK-NEXT: v_readlane_b32 s86, v0, 53 +; CHECK-NEXT: v_readlane_b32 s85, v0, 52 +; CHECK-NEXT: v_readlane_b32 s84, v0, 51 +; CHECK-NEXT: v_readlane_b32 s83, v0, 50 +; CHECK-NEXT: v_readlane_b32 s82, v0, 49 +; CHECK-NEXT: v_readlane_b32 s81, v0, 48 +; CHECK-NEXT: v_readlane_b32 s80, v0, 47 +; CHECK-NEXT: v_readlane_b32 s79, v0, 46 +; CHECK-NEXT: v_readlane_b32 s78, v0, 45 +; CHECK-NEXT: v_readlane_b32 s77, v0, 44 +; CHECK-NEXT: v_readlane_b32 s76, v0, 43 +; CHECK-NEXT: v_readlane_b32 s75, v0, 42 +; CHECK-NEXT: v_readlane_b32 s74, v0, 41 +; CHECK-NEXT: v_readlane_b32 s73, v0, 40 +; CHECK-NEXT: v_readlane_b32 s72, v0, 39 +; CHECK-NEXT: v_readlane_b32 s71, v0, 38 +; CHECK-NEXT: v_readlane_b32 s70, v0, 37 +; CHECK-NEXT: v_readlane_b32 s69, v0, 36 +; CHECK-NEXT: v_readlane_b32 s68, v0, 35 +; CHECK-NEXT: v_readlane_b32 s67, v0, 34 +; CHECK-NEXT: v_readlane_b32 s66, v0, 33 +; CHECK-NEXT: v_readlane_b32 s65, v0, 32 +; CHECK-NEXT: v_readlane_b32 s64, v0, 31 +; CHECK-NEXT: v_readlane_b32 s63, v0, 30 +; CHECK-NEXT: v_readlane_b32 s62, v0, 29 +; CHECK-NEXT: v_readlane_b32 s61, v0, 28 +; CHECK-NEXT: v_readlane_b32 s60, v0, 27 +; CHECK-NEXT: v_readlane_b32 s59, v0, 26 +; CHECK-NEXT: v_readlane_b32 s58, v0, 25 +; CHECK-NEXT: v_readlane_b32 s57, v0, 24 +; CHECK-NEXT: v_readlane_b32 s56, v0, 23 +; CHECK-NEXT: v_readlane_b32 s55, v0, 22 +; CHECK-NEXT: v_readlane_b32 s54, v0, 21 +; CHECK-NEXT: v_readlane_b32 s53, v0, 20 +; CHECK-NEXT: v_readlane_b32 s52, v0, 19 +; CHECK-NEXT: v_readlane_b32 s51, v0, 18 +; CHECK-NEXT: v_readlane_b32 s50, v0, 17 +; CHECK-NEXT: v_readlane_b32 s49, v0, 16 +; CHECK-NEXT: v_readlane_b32 s48, v0, 15 +; CHECK-NEXT: v_readlane_b32 s47, v0, 14 +; CHECK-NEXT: v_readlane_b32 s46, v0, 13 +; CHECK-NEXT: v_readlane_b32 s45, v0, 12 +; CHECK-NEXT: v_readlane_b32 s44, v0, 11 +; CHECK-NEXT: v_readlane_b32 s43, v0, 10 +; CHECK-NEXT: v_readlane_b32 s42, v0, 9 +; CHECK-NEXT: v_readlane_b32 s41, v0, 8 +; CHECK-NEXT: v_readlane_b32 s40, v0, 7 +; CHECK-NEXT: v_readlane_b32 s39, v0, 6 +; CHECK-NEXT: v_readlane_b32 s38, v0, 5 +; CHECK-NEXT: v_readlane_b32 s37, v0, 4 +; CHECK-NEXT: v_readlane_b32 s36, v0, 3 +; CHECK-NEXT: v_readlane_b32 s35, v0, 2 +; CHECK-NEXT: v_readlane_b32 s34, v0, 1 +; CHECK-NEXT: v_readlane_b32 s33, v0, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[4:5] entry: + %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 @@ -114,10 +1816,14 @@ %cmp = icmp eq i32 %cnd, 0 br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch -bb2: ; 28 bytes - ; 24 byte asm +bb2: ; 68 bytes + ; 64 byte asm call void asm sideeffect "v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 v_nop_e64 v_nop_e64 v_nop_e64",""() #0 @@ -231,4 +1937,6 @@ ret void } +declare i32 @llvm.amdgcn.workgroup.id.x() #0 + attributes #0 = { nounwind }