diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp @@ -25,10 +25,12 @@ std::unique_ptr &Inst, const MCInst &MCI) { switch (MCI.getOpcode()) { case AMDGPU::S_WAITCNT: + case AMDGPU::S_WAITCNT_soft: case AMDGPU::S_WAITCNT_EXPCNT: case AMDGPU::S_WAITCNT_LGKMCNT: case AMDGPU::S_WAITCNT_VMCNT: case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VSCNT_soft: case AMDGPU::S_WAITCNT_EXPCNT_gfx10: case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: case AMDGPU::S_WAITCNT_VMCNT_gfx10: @@ -77,10 +79,12 @@ default: return 0; case AMDGPU::S_WAITCNT: // This instruction + case AMDGPU::S_WAITCNT_soft: case AMDGPU::S_WAITCNT_EXPCNT: case AMDGPU::S_WAITCNT_LGKMCNT: case AMDGPU::S_WAITCNT_VMCNT: - case AMDGPU::S_WAITCNT_VSCNT: // to this instruction are all pseudo. + case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo. case AMDGPU::S_WAITCNT_EXPCNT_gfx10: case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: case AMDGPU::S_WAITCNT_VMCNT_gfx10: diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -292,6 +292,13 @@ VgprVmemTypes[GprNo] = 0; } + void setNonKernelFunctionInitialState() { + for (InstCounterType Counter : inst_counter_types()) { + setScoreUB(Counter, getWaitCountMax(Counter)); + PendingEvents |= WaitEventMaskForInst[Counter]; + } + } + void print(raw_ostream &); void dump() { print(dbgs()); } @@ -364,7 +371,6 @@ const MachineRegisterInfo *MRI = nullptr; AMDGPU::IsaVersion IV; - DenseSet TrackedWaitcntSet; DenseMap SLoadAddresses; DenseMap PreheadersToFlush; MachineLoopInfo *MLI; @@ -477,7 +483,7 @@ bool generateWaitcnt(AMDGPU::Waitcnt Wait, MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr); + MachineInstr *OldWaitcntInstr) const; void updateEventWaitcntAfter(MachineInstr &Inst, WaitcntBrackets *ScoreBrackets); bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block, @@ -486,6 +492,7 @@ MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) const; + bool updateWaitcntIfSoft(MachineInstr *&Waitcnt) const; }; } // end anonymous namespace @@ -865,6 +872,26 @@ return true; } +bool SIInsertWaitcnts::updateWaitcntIfSoft(MachineInstr *&Waitcnt) const { + unsigned Opcode = Waitcnt->getOpcode(); + if (!SIInstrInfo::isSoftWaitcnt(Opcode)) + return false; + + MachineBasicBlock &Block = *Waitcnt->getParent(); + auto InsertBefore = Waitcnt->getIterator(); + auto DL = Waitcnt->getDebugLoc(); + auto &MCInfo = TII->get(SIInstrInfo::getNonSoftWaitcntOpcode(Opcode)); + + MachineInstrBuilder StrongWaitcnt = BuildMI(Block, InsertBefore, DL, MCInfo); + for (auto &Operand : Waitcnt->operands()) + StrongWaitcnt->addOperand(Operand); + + Waitcnt->eraseFromParent(); + Waitcnt = StrongWaitcnt; + + return true; +} + /// Combine consecutive waitcnt instructions that precede \p It and follow /// \p OldWaitcntInstr and apply any extra wait from waitcnt that were added /// by previous passes. Currently this pass conservatively assumes that these @@ -881,18 +908,22 @@ if (II.isMetaInstruction()) continue; - if (II.getOpcode() == AMDGPU::S_WAITCNT) { + unsigned Opcode = II.getOpcode(); + bool CanFullyDiscardWaitcntSequence = SIInstrInfo::isSoftWaitcnt(Opcode); + + if (SIInstrInfo::isWaitcnt(Opcode)) { // Conservatively update required wait if this waitcnt was added in an // earlier pass. In this case it will not exist in the tracked waitcnt // set. - if (!TrackedWaitcntSet.count(&II)) { - unsigned IEnc = II.getOperand(0).getImm(); - AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc); - Wait = Wait.combined(OldWait); - } + unsigned IEnc = II.getOperand(0).getImm(); + AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc); + if (CanFullyDiscardWaitcntSequence) + ScoreBrackets.simplifyWaitcnt(OldWait); + Wait = Wait.combined(OldWait); // Merge consecutive waitcnt of the same type by erasing multiples. - if (!WaitcntInstr) { + if (!WaitcntInstr && + (Wait.hasWaitExceptVsCnt() || !CanFullyDiscardWaitcntSequence)) { WaitcntInstr = &II; } else { II.eraseFromParent(); @@ -900,15 +931,17 @@ } } else { - assert(II.getOpcode() == AMDGPU::S_WAITCNT_VSCNT); + assert(SIInstrInfo::isWaitcntVsCnt(Opcode)); assert(II.getOperand(0).getReg() == AMDGPU::SGPR_NULL); - if (!TrackedWaitcntSet.count(&II)) { - unsigned OldVSCnt = - TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm(); - Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt); - } - if (!WaitcntVsCntInstr) { + unsigned OldVSCnt = + TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm(); + if (CanFullyDiscardWaitcntSequence) + ScoreBrackets.simplifyWaitcnt(InstCounterType::VS_CNT, OldVSCnt); + Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt); + + if (!WaitcntVsCntInstr && + (Wait.hasWaitVsCnt() || !CanFullyDiscardWaitcntSequence)) { WaitcntVsCntInstr = &II; } else { II.eraseFromParent(); @@ -919,48 +952,38 @@ // Updated encoding of merged waitcnt with the required wait. if (WaitcntInstr) { - if (Wait.hasWaitExceptVsCnt()) { - Modified |= - updateOperandIfDifferent(*WaitcntInstr, AMDGPU::OpName::simm16, - AMDGPU::encodeWaitcnt(IV, Wait)); - ScoreBrackets.applyWaitcnt(Wait); - Wait.VmCnt = ~0u; - Wait.LgkmCnt = ~0u; - Wait.ExpCnt = ~0u; - - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() - ? dbgs() << "applyPreexistingWaitcnt\n" - << "New Instr at block end: " << *WaitcntInstr - << '\n' - : dbgs() << "applyPreexistingWaitcnt\n" - << "Old Instr: " << *It - << "New Instr: " << *WaitcntInstr << '\n'); + Modified |= updateOperandIfDifferent(*WaitcntInstr, AMDGPU::OpName::simm16, + AMDGPU::encodeWaitcnt(IV, Wait)); + Modified |= updateWaitcntIfSoft(WaitcntInstr); - } else { - WaitcntInstr->eraseFromParent(); - Modified = true; - } + ScoreBrackets.applyWaitcnt(Wait); + Wait.VmCnt = ~0u; + Wait.LgkmCnt = ~0u; + Wait.ExpCnt = ~0u; + + LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() + ? dbgs() + << "applyPreexistingWaitcnt\n" + << "New Instr at block end: " << *WaitcntInstr << '\n' + : dbgs() << "applyPreexistingWaitcnt\n" + << "Old Instr: " << *It + << "New Instr: " << *WaitcntInstr << '\n'); } if (WaitcntVsCntInstr) { - if (Wait.hasWaitVsCnt()) { - assert(ST->hasVscnt()); - Modified |= updateOperandIfDifferent(*WaitcntVsCntInstr, - AMDGPU::OpName::simm16, Wait.VsCnt); - ScoreBrackets.applyWaitcnt(Wait); - Wait.VsCnt = ~0u; - - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() - ? dbgs() << "applyPreexistingWaitcnt\n" - << "New Instr at block end: " - << *WaitcntVsCntInstr << '\n' - : dbgs() << "applyPreexistingWaitcnt\n" - << "Old Instr: " << *It - << "New Instr: " << *WaitcntVsCntInstr << '\n'); - } else { - WaitcntVsCntInstr->eraseFromParent(); - Modified = true; - } + Modified |= updateOperandIfDifferent(*WaitcntVsCntInstr, + AMDGPU::OpName::simm16, Wait.VsCnt); + Modified |= updateWaitcntIfSoft(WaitcntVsCntInstr); + ScoreBrackets.applyWaitcnt(Wait); + Wait.VsCnt = ~0u; + + LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() + ? dbgs() << "applyPreexistingWaitcnt\n" + << "New Instr at block end: " << *WaitcntVsCntInstr + << '\n' + : dbgs() << "applyPreexistingWaitcnt\n" + << "Old Instr: " << *It + << "New Instr: " << *WaitcntVsCntInstr << '\n'); } return Modified; @@ -1279,7 +1302,7 @@ MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr) { + MachineInstr *OldWaitcntInstr) const { bool Modified = false; const DebugLoc &DL = Block.findDebugLoc(It); @@ -1312,7 +1335,6 @@ unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait); auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc); - TrackedWaitcntSet.insert(SWaitInst); Modified = true; LLVM_DEBUG(dbgs() << "generateWaitcnt\n"; @@ -1326,7 +1348,6 @@ auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT)) .addReg(AMDGPU::SGPR_NULL, RegState::Undef) .addImm(Wait.VsCnt); - TrackedWaitcntSet.insert(SWaitInst); Modified = true; LLVM_DEBUG(dbgs() << "generateWaitcnt\n"; @@ -1569,9 +1590,9 @@ } static bool isWaitInstr(MachineInstr &Inst) { - return Inst.getOpcode() == AMDGPU::S_WAITCNT || - (Inst.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && - Inst.getOperand(0).isReg() && + auto Opcode = Inst.getOpcode(); + return SIInstrInfo::isWaitcnt(Opcode) || + (SIInstrInfo::isWaitcntVsCnt(Opcode) && Inst.getOperand(0).isReg() && Inst.getOperand(0).getReg() == AMDGPU::SGPR_NULL); } @@ -1716,26 +1737,25 @@ // which we want to flush the vmcnt counter, and false otherwise. bool SIInsertWaitcnts::isPreheaderToFlush(MachineBasicBlock &MBB, WaitcntBrackets &ScoreBrackets) { - if (PreheadersToFlush.count(&MBB)) - return PreheadersToFlush[&MBB]; - - auto UpdateCache = [&](bool val) { - PreheadersToFlush[&MBB] = val; - return val; - }; + auto [Iterator, IsInserted] = PreheadersToFlush.try_emplace(&MBB, false); + if (!IsInserted) + return Iterator->second; MachineBasicBlock *Succ = MBB.getSingleSuccessor(); if (!Succ) - return UpdateCache(false); + return false; MachineLoop *Loop = MLI->getLoopFor(Succ); if (!Loop) - return UpdateCache(false); + return false; - if (Loop->getLoopPreheader() == &MBB && shouldFlushVmCnt(Loop, ScoreBrackets)) - return UpdateCache(true); + if (Loop->getLoopPreheader() == &MBB && + shouldFlushVmCnt(Loop, ScoreBrackets)) { + Iterator->second = true; + return true; + } - return UpdateCache(false); + return false; } bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const { @@ -1839,7 +1859,6 @@ Encoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0); Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1; - TrackedWaitcntSet.clear(); BlockInfos.clear(); bool Modified = false; @@ -1857,6 +1876,11 @@ ; BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0); + auto NonKernelInitialState = + std::make_unique(ST, Limits, Encoding); + NonKernelInitialState->setNonKernelFunctionInitialState(); + BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState); + Modified = true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -842,6 +842,31 @@ return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; } + static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { + if (isWaitcnt(Opcode)) + return AMDGPU::S_WAITCNT; + + if (isWaitcntVsCnt(Opcode)) + return AMDGPU::S_WAITCNT_VSCNT; + + llvm_unreachable("Expected opcode S_WAITCNT/S_WAITCNT_VSCNT"); + } + + static bool isWaitcnt(unsigned Opcode) { + return Opcode == AMDGPU::S_WAITCNT || Opcode == AMDGPU::S_WAITCNT_soft; + } + + static bool isWaitcntVsCnt(unsigned Opcode) { + return Opcode == AMDGPU::S_WAITCNT_VSCNT || + Opcode == AMDGPU::S_WAITCNT_VSCNT_soft; + } + + // soft waitcnt instructions can be relaxed/optimized out by SIInsertWaitcnts + static bool isSoftWaitcnt(unsigned Opcode) { + return Opcode == AMDGPU::S_WAITCNT_soft || + Opcode == AMDGPU::S_WAITCNT_VSCNT_soft; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(isCopyInstr(MI)); Register Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8289,6 +8289,11 @@ } int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { + + // FIXME: move to the right place + if (SIInstrInfo::isSoftWaitcnt(Opcode)) + Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(Opcode); + unsigned Gen = subtargetEncodingFamily(ST); if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1055,7 +1055,8 @@ VMCnt ? 0 : getVmcntBitMask(IV), getExpcntBitMask(IV), LGKMCnt ? 0 : getLgkmcntBitMask(IV)); - BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft)) + .addImm(WaitCntImmediate); Changed = true; } @@ -1963,14 +1964,15 @@ VMCnt ? 0 : getVmcntBitMask(IV), getExpcntBitMask(IV), LGKMCnt ? 0 : getLgkmcntBitMask(IV)); - BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft)) + .addImm(WaitCntImmediate); Changed = true; } if (VSCnt) { - BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT)) - .addReg(AMDGPU::SGPR_NULL, RegState::Undef) - .addImm(0); + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); Changed = true; } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1272,9 +1272,18 @@ let mayStore = 1; } -let hasSideEffects = 1 in def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins SWaitCnt:$simm16), "$simm16", [(int_amdgcn_s_waitcnt timm:$simm16)]>; + +// "_soft" waitcnts are waitcnts whose wait can be relaxed or completely removed. +// These are inserted by to resolve memory dependencies by the memory legalizer and later optimized by SIInsertWaitcnts +// For example, a S_WAITCNT_soft 0 can be completely removed on a function that doesn't access memory. +def S_WAITCNT_soft : SOPP_Pseudo <"s_soft_waitcnt" , (ins SWaitCnt:$simm16), "$simm16">; +def S_WAITCNT_VSCNT_soft : SOPP_Pseudo<"s_soft_waitcnt_vscnt", (ins SReg_32:$sdst, s16imm:$simm16), "$sdst, $simm16"> { + let mayLoad = 1; + let mayStore = 1; + let has_sdst = 1; +} def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_sethalt timm:$simm16)]>; def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll @@ -22,7 +22,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_rtn_u32 v2, v1, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -38,7 +37,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_rtn_u32 v2, v1, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -53,7 +51,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_rtn_u32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -67,8 +64,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u32 v0, v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -83,8 +79,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_rtn_u32 v0, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -107,7 +101,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_rtn_u32 v2, v1, v0 offset:16 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -123,7 +116,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_rtn_u32 v2, v1, v0 offset:16 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -138,7 +130,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_rtn_u32 v0, v1, v0 offset:16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -152,8 +143,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u32 v0, v1, v0 offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -168,8 +158,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_rtn_u32 v0, v1, v0 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -192,7 +180,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_u32 v1, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -204,7 +191,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_u32 v1, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -215,7 +201,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_u32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -226,8 +211,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_dec_u32 v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -238,8 +221,6 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_u32 v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -256,7 +237,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_u32 v1, v0 offset:16 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -268,7 +248,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_u32 v1, v0 offset:16 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -279,7 +258,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_u32 v1, v0 offset:16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -290,8 +268,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_dec_u32 v1, v0 offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -302,8 +278,6 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_u32 v1, v0 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -321,7 +295,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -337,7 +310,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -351,7 +323,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -363,8 +335,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -376,8 +347,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -401,7 +371,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -419,7 +388,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -433,7 +401,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -445,8 +413,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -458,8 +425,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -484,7 +450,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -502,7 +467,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -516,7 +480,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -528,8 +492,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -541,8 +504,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -565,7 +527,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -578,7 +539,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -589,7 +549,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -600,8 +560,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -612,8 +571,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -633,7 +591,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -648,7 +605,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -659,7 +615,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -670,8 +626,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -682,8 +637,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -704,7 +658,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -719,7 +672,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -730,7 +682,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -741,8 +693,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -753,8 +704,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -778,7 +728,6 @@ ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -801,7 +750,6 @@ ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -817,7 +765,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, v1, s[2:3] offset:20 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -829,8 +777,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v1, v0, v1, s[2:3] offset:20 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -842,8 +789,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v1, v0, v1, s[2:3] offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -874,7 +820,6 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -892,7 +837,6 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -903,7 +847,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[0:1] offset:20 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -914,8 +858,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec v0, v1, s[0:1] offset:20 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -926,8 +869,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u32 v0, v1, s[0:1] offset:20 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -948,7 +890,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -964,7 +905,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -980,7 +920,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -996,8 +935,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1013,8 +950,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1037,7 +972,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1055,7 +989,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1071,7 +1004,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1089,8 +1021,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1106,8 +1036,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1131,7 +1059,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1149,7 +1076,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1165,7 +1091,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1183,8 +1108,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1200,8 +1123,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1223,7 +1144,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1236,7 +1156,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1249,7 +1168,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v[0:1], v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1262,8 +1180,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1277,8 +1193,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1299,7 +1213,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1314,7 +1227,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1327,7 +1239,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v[0:1], v2 offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1342,8 +1253,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1357,8 +1266,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1380,7 +1287,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1395,7 +1301,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1408,7 +1313,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v[0:1], v2 offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1423,8 +1327,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1438,8 +1340,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1464,7 +1364,6 @@ ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1487,7 +1386,6 @@ ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1508,7 +1406,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v3, v[0:1], v3 offset:20 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1531,8 +1428,6 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v3, v[0:1], v3 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1553,8 +1448,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v3, v[0:1], v3 offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1587,7 +1480,6 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1605,7 +1497,6 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1621,7 +1512,6 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec v[0:1], v2 offset:20 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1639,8 +1529,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 42 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1658,8 +1546,6 @@ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: v_mov_b32_e32 v2, 42 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u32 v[0:1], v2 offset:20 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1682,7 +1568,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1704,7 +1589,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1726,7 +1610,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1743,8 +1626,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1761,8 +1642,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1786,7 +1665,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1810,7 +1688,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1832,7 +1709,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1851,8 +1727,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1869,8 +1743,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1893,7 +1765,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1907,7 +1778,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1921,7 +1791,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1935,8 +1804,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1951,8 +1818,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1974,7 +1839,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1990,7 +1854,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2004,7 +1867,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2020,8 +1882,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2036,8 +1896,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2060,7 +1918,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2076,7 +1933,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2090,7 +1946,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2106,8 +1961,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2122,8 +1975,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2149,7 +2000,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2176,7 +2026,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2201,7 +2050,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[0:1], v[3:4], v[1:2] offset:40 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2225,8 +2073,6 @@ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 40 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2249,8 +2095,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[0:1], v[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2284,7 +2128,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2303,7 +2146,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2320,7 +2162,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_dec_x2 v[3:4], v[1:2] offset:40 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2339,8 +2180,6 @@ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 40 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2359,8 +2198,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_dec_u64 v[0:1], v[2:3] offset:40 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2431,8 +2268,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 9 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u32 v1, v1, v2 offset:8 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2446,8 +2282,7 @@ ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_dec_rtn_u32 v1, v1, v2 offset:8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2477,7 +2312,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 @@ -2494,7 +2328,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 @@ -2510,7 +2343,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2525,8 +2357,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2542,8 +2373,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2567,7 +2396,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 @@ -2584,7 +2412,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 @@ -2600,7 +2427,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2615,8 +2441,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2632,8 +2457,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2657,7 +2480,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_u64 v2, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -2670,7 +2492,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_u64 v2, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -2682,7 +2503,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_u64 v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -2694,8 +2514,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_dec_u64 v2, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2707,8 +2525,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_u64 v2, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2726,7 +2542,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -2739,7 +2554,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -2751,7 +2565,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -2763,8 +2576,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2776,8 +2587,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2796,7 +2605,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2813,7 +2621,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2828,7 +2635,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2841,8 +2648,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2855,8 +2661,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2881,7 +2686,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2900,7 +2704,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2915,7 +2718,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2928,8 +2731,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2942,8 +2744,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2969,7 +2770,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2988,7 +2788,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3003,7 +2802,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3016,8 +2815,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3030,8 +2828,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3055,7 +2852,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3069,7 +2865,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3081,7 +2876,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3093,8 +2888,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3106,8 +2900,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3128,7 +2921,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3144,7 +2936,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3156,7 +2947,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3168,8 +2959,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3181,8 +2971,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3204,7 +2993,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3220,7 +3008,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3232,7 +3019,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3244,8 +3031,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3257,8 +3043,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3283,7 +3068,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3307,7 +3091,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3324,7 +3107,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3337,8 +3120,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3351,8 +3133,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 42 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 3, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3384,7 +3165,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3403,7 +3183,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3415,7 +3194,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v0, v[1:2], s[0:1] offset:40 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3427,8 +3206,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_dec_x2 v0, v[1:2], s[0:1] offset:40 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3440,8 +3218,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 3, v0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_dec_u64 v0, v[1:2], s[0:1] offset:40 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3515,8 +3292,7 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_dec_rtn_u64 v[1:2], v3, v[1:2] offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3531,8 +3307,7 @@ ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 3, v0 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_dec_rtn_u64 v[1:2], v3, v[1:2] offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll @@ -22,7 +22,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u32 v2, v1, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -38,7 +37,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u32 v2, v1, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -53,7 +51,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -67,8 +64,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u32 v0, v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -83,8 +79,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_rtn_u32 v0, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -107,7 +101,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u32 v2, v1, v0 offset:16 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 @@ -123,7 +116,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u32 v2, v1, v0 offset:16 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -138,7 +130,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -152,8 +143,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -168,8 +158,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -192,7 +180,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_u32 v1, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -204,7 +191,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_u32 v1, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -215,7 +201,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_u32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -226,8 +211,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_inc_u32 v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -238,8 +221,6 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_u32 v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -256,7 +237,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_u32 v1, v0 offset:16 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -268,7 +248,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_u32 v1, v0 offset:16 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -279,7 +258,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -290,8 +268,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -302,8 +278,6 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -321,7 +295,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -337,7 +310,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -351,7 +323,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -363,8 +335,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -376,8 +347,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -401,7 +371,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -419,7 +388,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -433,7 +401,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -445,8 +413,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -458,8 +425,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -484,7 +450,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -502,7 +467,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -516,7 +480,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -528,8 +492,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[2:3] offset:16 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -541,8 +504,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, v0, s[2:3] offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -565,7 +527,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -578,7 +539,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -589,7 +549,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -600,8 +560,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -612,8 +571,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -633,7 +591,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -648,7 +605,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -659,7 +615,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -670,8 +626,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -682,8 +637,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -704,7 +658,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -719,7 +672,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -730,7 +682,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -741,8 +693,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -753,8 +704,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, s[0:1] offset:16 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -778,7 +728,6 @@ ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -801,7 +750,6 @@ ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -817,7 +765,7 @@ ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, v1, s[2:3] offset:20 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -829,8 +777,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v1, v0, v1, s[2:3] offset:20 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -842,8 +789,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v1, v0, v1, s[2:3] offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -874,7 +820,6 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -892,7 +837,6 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -903,7 +847,7 @@ ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[0:1] offset:20 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -914,8 +858,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc v0, v1, s[0:1] offset:20 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -926,8 +869,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u32 v0, v1, s[0:1] offset:20 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -997,8 +939,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 9 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u32 v1, v1, v2 offset:8 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1012,8 +953,7 @@ ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_lshlrev_b32 v1, 2, v0 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_inc_rtn_u32 v1, v1, v2 offset:8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1043,7 +983,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 @@ -1060,7 +999,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1076,7 +1014,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -1091,8 +1028,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1108,8 +1044,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1133,7 +1067,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 @@ -1150,7 +1083,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 @@ -1166,7 +1098,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -1181,8 +1112,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1198,8 +1128,6 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1223,7 +1151,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_u64 v2, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -1236,7 +1163,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_u64 v2, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -1248,7 +1174,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_u64 v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -1260,8 +1185,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_inc_u64 v2, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1273,8 +1196,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_u64 v2, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1292,7 +1213,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_endpgm @@ -1305,7 +1225,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_endpgm @@ -1317,7 +1236,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_endpgm @@ -1329,8 +1247,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1342,8 +1258,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1362,7 +1276,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1379,7 +1292,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1394,7 +1306,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1407,8 +1319,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1421,8 +1332,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1447,7 +1357,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1466,7 +1375,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1481,7 +1389,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1494,8 +1402,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1508,8 +1415,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1535,7 +1441,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1554,7 +1459,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1569,7 +1473,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1582,8 +1486,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1596,8 +1499,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v2, v[0:1], s[2:3] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1621,7 +1523,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1635,7 +1536,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1647,7 +1547,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1659,8 +1559,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1672,8 +1571,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1694,7 +1592,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1710,7 +1607,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1722,7 +1618,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1734,8 +1630,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1747,8 +1642,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1770,7 +1664,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1786,7 +1679,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1798,7 +1690,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1810,8 +1702,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1823,8 +1714,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1849,7 +1739,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1873,7 +1762,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1890,7 +1778,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1903,8 +1791,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1917,8 +1804,7 @@ ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v1, 42 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 3, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v[0:1], v3, v[1:2], s[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1950,7 +1836,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1969,7 +1854,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1981,7 +1865,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v0, v[1:2], s[0:1] offset:40 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1993,8 +1877,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_atomic_inc_x2 v0, v[1:2], s[0:1] offset:40 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2006,8 +1889,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_lshlrev_b32 v0, 3, v0 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_inc_u64 v0, v[1:2], s[0:1] offset:40 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2028,7 +1910,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2044,7 +1925,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2060,7 +1940,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2076,8 +1955,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2093,8 +1970,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2117,7 +1992,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2135,7 +2009,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2151,7 +2024,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2169,8 +2041,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2186,8 +2056,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2211,7 +2079,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2229,7 +2096,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2245,7 +2111,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2263,8 +2128,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2280,8 +2143,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v2, v[0:1], v2 offset:16 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2303,7 +2164,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2316,7 +2176,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2329,7 +2188,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v[0:1], v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2342,8 +2200,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2357,8 +2213,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2379,7 +2233,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2394,7 +2247,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2407,7 +2259,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2422,8 +2273,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2437,8 +2286,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2460,7 +2307,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2475,7 +2321,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2488,7 +2333,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2503,8 +2347,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2518,8 +2360,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2544,7 +2384,6 @@ ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2567,7 +2406,6 @@ ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2588,7 +2426,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v3, v[0:1], v3 offset:20 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2611,8 +2448,6 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2633,8 +2468,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2667,7 +2500,6 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2685,7 +2517,6 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2701,7 +2532,6 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 42 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:20 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2719,8 +2549,6 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 42 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2738,8 +2566,6 @@ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: v_mov_b32_e32 v2, 42 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2814,8 +2640,7 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u64 v[1:2], v3, v[1:2] offset:16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2830,8 +2655,7 @@ ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 3, v0 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 2, v0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_inc_rtn_u64 v[1:2], v3, v[1:2] offset:16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2860,7 +2684,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2882,7 +2705,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2904,7 +2726,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2921,8 +2742,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2939,8 +2758,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2964,7 +2781,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -2988,7 +2804,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3010,7 +2825,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3029,8 +2843,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3047,8 +2859,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3073,7 +2883,6 @@ ; CI-NEXT: s_addc_u32 s3, s3, 0 ; CI-NEXT: v_mov_b32_e32 v2, s2 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3097,7 +2906,6 @@ ; VI-NEXT: s_addc_u32 s3, s3, 0 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3119,7 +2927,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3138,8 +2945,6 @@ ; GFX10-NEXT: s_addc_u32 s3, s3, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3156,8 +2961,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2 ; GFX11-NEXT: v_mov_b32_e32 v3, s3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3], v[0:1] offset:32 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3180,7 +2983,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3194,7 +2996,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3208,7 +3009,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3222,8 +3022,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3238,8 +3036,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3261,7 +3057,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3277,7 +3072,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3291,7 +3085,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3307,8 +3100,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3323,8 +3114,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3347,7 +3136,6 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3363,7 +3151,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3377,7 +3164,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3393,8 +3179,6 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3409,8 +3193,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3436,7 +3218,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3463,7 +3244,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3488,7 +3268,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[3:4], v[1:2] offset:40 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3512,8 +3291,6 @@ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 40 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3536,8 +3313,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3571,7 +3346,6 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3590,7 +3364,6 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3607,7 +3380,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: flat_atomic_inc_x2 v[3:4], v[1:2] offset:40 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3626,8 +3398,6 @@ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 40 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3646,8 +3416,6 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3670,7 +3438,6 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s6 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u32 v2, v1, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: ds_inc_rtn_u32 v3, v1, v0 @@ -3691,7 +3458,6 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u32 v2, v1, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_inc_rtn_u32 v3, v1, v0 @@ -3711,7 +3477,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u32 v2, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_inc_rtn_u32 v0, v0, v1 @@ -3728,13 +3493,10 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_inc_rtn_u32 v2, v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: ds_inc_rtn_u32 v0, v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3749,13 +3511,10 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s2 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_inc_rtn_u32 v2, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ds_inc_rtn_u32 v0, v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll @@ -32,7 +32,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -49,7 +48,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -75,7 +73,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -191,9 +188,8 @@ ; GFX940-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NEXT: v_mov_b32_e32 v1, s0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_bf16 v1, v0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) @@ -205,9 +201,8 @@ ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1104,7 +1104,6 @@ ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1123,7 +1122,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1140,7 +1139,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1152,7 +1151,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1176,7 +1175,6 @@ ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1195,7 +1193,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1212,7 +1210,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1224,7 +1222,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1265,7 +1263,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1285,7 +1282,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1301,7 +1297,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1312,7 +1307,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1334,7 +1328,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1354,7 +1347,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1415,7 +1407,6 @@ ; GFX90A-NEXT: .LBB49_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1433,7 +1424,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1457,7 +1448,6 @@ ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1477,7 +1467,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1495,7 +1484,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1508,7 +1496,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1532,7 +1519,6 @@ ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1553,7 +1539,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1575,7 +1560,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1595,7 +1579,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1611,7 +1594,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1622,7 +1604,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1644,7 +1625,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1665,7 +1645,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1733,7 +1712,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1752,7 +1730,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1906,7 +1883,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm @@ -1917,7 +1893,6 @@ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_endpgm @@ -1934,7 +1909,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm @@ -1945,7 +1919,6 @@ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_endpgm @@ -1967,7 +1940,6 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 ; GFX90A-NEXT: v_mov_b32_e32 v4, s2 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1] @@ -1990,7 +1962,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 ; GFX940-NEXT: v_mov_b32_e32 v4, s2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1] @@ -2011,7 +1982,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -2020,7 +1990,6 @@ ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -906,7 +906,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -921,7 +920,6 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -944,7 +942,6 @@ ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, s4 ; GFX6-NEXT: v_mov_b32_e32 v2, s5 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -962,7 +959,6 @@ ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, s4 ; GFX7-NEXT: v_mov_b32_e32 v2, s5 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -981,7 +977,6 @@ ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -996,7 +991,6 @@ ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1016,7 +1010,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v2, 2 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, s0 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1031,7 +1024,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, 2 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1053,7 +1045,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v2, 2 ; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1070,7 +1061,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, 2 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1091,7 +1081,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1107,7 +1096,6 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], off, s[0:3], s4 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1132,7 +1120,6 @@ ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, s4 ; GFX6-NEXT: v_mov_b32_e32 v4, s5 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1151,7 +1138,6 @@ ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, s4 ; GFX7-NEXT: v_mov_b32_e32 v4, s5 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_cmpswap v[1:2], v[3:4], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1172,7 +1158,6 @@ ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: s_movk_i32 s4, 0x3ffc -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1187,7 +1172,6 @@ ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_movk_i32 s4, 0x3ffc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1208,7 +1192,6 @@ ; GFX6-NEXT: s_mov_b32 s1, 4 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, s0 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1223,7 +1206,6 @@ ; GFX7-NEXT: s_mov_b32 s1, 4 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 @@ -1246,7 +1228,6 @@ ; GFX6-NEXT: s_mov_b32 s1, s3 ; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1263,7 +1244,6 @@ ; GFX7-NEXT: s_mov_b32 s1, s3 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_atomic_cmpswap v[2:3], v[0:1], s[0:3], 0 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -31,7 +31,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -64,7 +63,6 @@ ; GFX89-NEXT: s_mov_b32 s10, -1 ; GFX89-NEXT: s_mov_b32 s9, s3 ; GFX89-NEXT: v_mov_b32_e32 v1, s2 -; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX89-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: buffer_wbinvl1_vol @@ -97,8 +95,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -132,8 +128,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -169,8 +163,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -207,8 +199,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -252,7 +242,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s12, s6 ; GFX7LESS-NEXT: s_mov_b32 s13, s7 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -287,7 +276,6 @@ ; GFX8-NEXT: s_mov_b32 s12, s6 ; GFX8-NEXT: s_mov_b32 s13, s7 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -322,7 +310,6 @@ ; GFX9-NEXT: s_mov_b32 s12, s6 ; GFX9-NEXT: s_mov_b32 s13, s7 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -358,8 +345,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064-NEXT: s_mov_b32 s12, s6 ; GFX1064-NEXT: s_mov_b32 s13, s7 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_add v1, off, s[12:15], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -395,8 +380,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032-NEXT: s_mov_b32 s8, s6 ; GFX1032-NEXT: s_mov_b32 s9, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_add v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -434,8 +417,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: s_mov_b32 s12, s6 ; GFX1164-NEXT: s_mov_b32 s13, s7 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_add_u32 v1, off, s[12:15], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -474,8 +455,6 @@ ; GFX1132-NEXT: v_mov_b32_e32 v1, s2 ; GFX1132-NEXT: s_mov_b32 s8, s6 ; GFX1132-NEXT: s_mov_b32 s9, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_add_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -509,7 +488,6 @@ ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -553,7 +531,6 @@ ; GFX8-NEXT: s_mov_b32 s8, s2 ; GFX8-NEXT: s_mov_b32 s9, s3 ; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -602,7 +579,6 @@ ; GFX9-NEXT: s_mov_b32 s8, s2 ; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -650,8 +626,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -697,8 +671,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_add v0, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -752,8 +724,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_add_u32 v0, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -803,8 +773,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -849,7 +817,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -889,7 +856,6 @@ ; GFX89-NEXT: s_mov_b32 s9, s3 ; GFX89-NEXT: v_mov_b32_e32 v0, s2 ; GFX89-NEXT: v_mov_b32_e32 v1, 0 -; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX89-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: buffer_wbinvl1_vol @@ -927,8 +893,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -964,8 +928,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1003,8 +965,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1042,8 +1002,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1092,7 +1050,6 @@ ; GFX7LESS-NEXT: s_mul_i32 s6, s0, s6 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, s7, v0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -1136,7 +1093,6 @@ ; GFX8-NEXT: s_mov_b32 s14, -1 ; GFX8-NEXT: s_mov_b32 s13, s7 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s6, v1 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -1179,7 +1135,6 @@ ; GFX9-NEXT: s_mov_b32 s14, -1 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_add_x2 v[0:1], off, s[12:15], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1222,8 +1177,6 @@ ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s8, s6 ; GFX1064-NEXT: s_mov_b32 s9, s7 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1265,8 +1218,6 @@ ; GFX1032-NEXT: s_mov_b32 s10, -1 ; GFX1032-NEXT: s_mov_b32 s8, s6 ; GFX1032-NEXT: s_mov_b32 s9, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1310,8 +1261,6 @@ ; GFX1164-NEXT: s_mov_b32 s10, -1 ; GFX1164-NEXT: s_mov_b32 s8, s6 ; GFX1164-NEXT: s_mov_b32 s9, s7 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1358,8 +1307,6 @@ ; GFX1132-NEXT: s_mov_b32 s10, -1 ; GFX1132-NEXT: s_mov_b32 s8, s6 ; GFX1132-NEXT: s_mov_b32 s9, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1398,7 +1345,6 @@ ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -1418,7 +1364,6 @@ ; GFX89-NEXT: s_mov_b32 s8, s2 ; GFX89-NEXT: s_mov_b32 s9, s3 ; GFX89-NEXT: v_mov_b32_e32 v1, 0 -; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX89-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: buffer_wbinvl1_vol @@ -1439,8 +1384,6 @@ ; GFX10-NEXT: s_mov_b32 s8, s2 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_mov_b32 s4, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_atomic_add_x2 v[0:1], off, s[8:11], 0 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1461,8 +1404,6 @@ ; GFX11-NEXT: s_mov_b32 s8, s2 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_mov_b32 s4, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], 0 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1500,7 +1441,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s6 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -1534,7 +1474,6 @@ ; GFX8-NEXT: s_mov_b32 s10, -1 ; GFX8-NEXT: s_mov_b32 s9, s3 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -1568,7 +1507,6 @@ ; GFX9-NEXT: s_mov_b32 s10, -1 ; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1602,8 +1540,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1638,8 +1574,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1676,8 +1610,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1715,8 +1647,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1761,7 +1691,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s12, s6 ; GFX7LESS-NEXT: s_mov_b32 s13, s7 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s2 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -1796,7 +1725,6 @@ ; GFX8-NEXT: s_mov_b32 s12, s6 ; GFX8-NEXT: s_mov_b32 s13, s7 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -1831,7 +1759,6 @@ ; GFX9-NEXT: s_mov_b32 s12, s6 ; GFX9-NEXT: s_mov_b32 s13, s7 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1867,8 +1794,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064-NEXT: s_mov_b32 s12, s6 ; GFX1064-NEXT: s_mov_b32 s13, s7 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_sub v1, off, s[12:15], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1905,8 +1830,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032-NEXT: s_mov_b32 s8, s6 ; GFX1032-NEXT: s_mov_b32 s9, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_sub v1, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1945,8 +1868,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164-NEXT: s_mov_b32 s12, s6 ; GFX1164-NEXT: s_mov_b32 s13, s7 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_sub_u32 v1, off, s[12:15], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1986,8 +1907,6 @@ ; GFX1132-NEXT: v_mov_b32_e32 v1, s2 ; GFX1132-NEXT: s_mov_b32 s8, s6 ; GFX1132-NEXT: s_mov_b32 s9, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_sub_u32 v1, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2022,7 +1941,6 @@ ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -2066,7 +1984,6 @@ ; GFX8-NEXT: s_mov_b32 s8, s2 ; GFX8-NEXT: s_mov_b32 s9, s3 ; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -2115,7 +2032,6 @@ ; GFX9-NEXT: s_mov_b32 s8, s2 ; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2163,8 +2079,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2210,8 +2124,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_sub v0, off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2265,8 +2177,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_sub_u32 v0, off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2316,8 +2226,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_sub_u32 v0, off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2362,7 +2270,6 @@ ; GFX7LESS-NEXT: s_mov_b32 s9, s3 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -2402,7 +2309,6 @@ ; GFX8-NEXT: s_mov_b32 s9, s3 ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -2441,7 +2347,6 @@ ; GFX9-NEXT: s_mov_b32 s9, s3 ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2480,8 +2385,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s8, s2 ; GFX1064-NEXT: s_mov_b32 s9, s3 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2520,8 +2423,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s8, s2 ; GFX1032-NEXT: s_mov_b32 s9, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2562,8 +2463,6 @@ ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_mov_b32 s8, s2 ; GFX1164-NEXT: s_mov_b32 s9, s3 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2604,8 +2503,6 @@ ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_mov_b32 s8, s2 ; GFX1132-NEXT: s_mov_b32 s9, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2657,7 +2554,6 @@ ; GFX7LESS-NEXT: s_mul_i32 s6, s0, s6 ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, s7, v0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -2701,7 +2597,6 @@ ; GFX8-NEXT: s_mov_b32 s14, -1 ; GFX8-NEXT: s_mov_b32 s13, s7 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s6, v1 -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_wbinvl1_vol @@ -2745,7 +2640,6 @@ ; GFX9-NEXT: s_mov_b32 s14, -1 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[12:15], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2790,8 +2684,6 @@ ; GFX1064-NEXT: s_mov_b32 s10, -1 ; GFX1064-NEXT: s_mov_b32 s8, s6 ; GFX1064-NEXT: s_mov_b32 s9, s7 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1064-NEXT: s_waitcnt vmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2836,8 +2728,6 @@ ; GFX1032-NEXT: s_mov_b32 s10, -1 ; GFX1032-NEXT: s_mov_b32 s8, s6 ; GFX1032-NEXT: s_mov_b32 s9, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX1032-NEXT: s_waitcnt vmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2884,8 +2774,6 @@ ; GFX1164-NEXT: s_mov_b32 s10, -1 ; GFX1164-NEXT: s_mov_b32 s8, s6 ; GFX1164-NEXT: s_mov_b32 s9, s7 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1164-NEXT: s_waitcnt vmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2934,8 +2822,6 @@ ; GFX1132-NEXT: s_mov_b32 s10, -1 ; GFX1132-NEXT: s_mov_b32 s8, s6 ; GFX1132-NEXT: s_mov_b32 s9, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX1132-NEXT: s_waitcnt vmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2976,7 +2862,6 @@ ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_mov_b32 s8, s2 ; GFX7LESS-NEXT: s_mov_b32 s9, s3 -; GFX7LESS-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7LESS-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: buffer_wbinvl1 @@ -2996,7 +2881,6 @@ ; GFX89-NEXT: s_mov_b32 s8, s2 ; GFX89-NEXT: s_mov_b32 s9, s3 ; GFX89-NEXT: v_mov_b32_e32 v1, 0 -; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX89-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX89-NEXT: s_waitcnt vmcnt(0) ; GFX89-NEXT: buffer_wbinvl1_vol @@ -3017,8 +2901,6 @@ ; GFX10-NEXT: s_mov_b32 s8, s2 ; GFX10-NEXT: s_mov_b32 s9, s3 ; GFX10-NEXT: s_mov_b32 s4, s0 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[8:11], 0 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3039,8 +2921,6 @@ ; GFX11-NEXT: s_mov_b32 s8, s2 ; GFX11-NEXT: s_mov_b32 s9, s3 ; GFX11-NEXT: s_mov_b32 s4, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], 0 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -32,7 +32,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB0_2: @@ -61,7 +60,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB0_2: @@ -89,7 +87,6 @@ ; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB0_2: @@ -117,8 +114,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_mul_i32 s4, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -147,8 +142,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_mul_i32 s3, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v2, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -180,8 +173,6 @@ ; GFX1164-NEXT: s_mul_i32 s4, s4, 5 ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -213,8 +204,6 @@ ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: s_mul_i32 s3, s3, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -257,7 +246,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB1_2: @@ -289,7 +277,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB1_2: @@ -320,7 +307,6 @@ ; GFX9-NEXT: s_mul_i32 s4, s6, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB1_2: @@ -351,8 +337,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mul_i32 s4, s6, s4 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -383,8 +367,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032-NEXT: v_mov_b32_e32 v2, s4 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -418,8 +400,6 @@ ; GFX1164-NEXT: s_mul_i32 s4, s6, s4 ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -453,8 +433,6 @@ ; GFX1132-NEXT: s_mul_i32 s4, s2, s4 ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s4 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_rtn_u32 v1, v1, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -524,7 +502,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB2_4: @@ -568,7 +545,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB2_4: @@ -611,8 +587,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -653,8 +627,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -703,8 +675,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -749,8 +719,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_rtn_u32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -779,7 +747,6 @@ ; GFX7LESS: ; %bb.0: ; %entry ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_add_u32 v1, v0 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_endpgm @@ -811,7 +778,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_u32 v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB3_4: @@ -843,7 +809,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_u32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB3_4: @@ -875,8 +840,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_u32 v0, v1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -905,8 +868,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_u32 v0, v1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -942,8 +903,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_u32 v0, v1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -974,8 +933,6 @@ ; GFX1132-NEXT: s_cbranch_execz .LBB3_4 ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_u32 v0, v1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1005,7 +962,6 @@ ; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB4_2: @@ -1039,7 +995,6 @@ ; GFX8-NEXT: s_mul_i32 s4, s4, 5 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB4_2: @@ -1072,7 +1027,6 @@ ; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB4_2: @@ -1105,8 +1059,6 @@ ; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064-NEXT: s_mul_i32 s4, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1136,8 +1088,6 @@ ; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3 ; GFX1032-NEXT: s_mul_i32 s3, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1170,8 +1120,6 @@ ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: s_mul_i32 s4, s4, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1205,8 +1153,6 @@ ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: s_mul_i32 s3, s3, 5 ; GFX1132-NEXT: v_mov_b32_e32 v1, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_rtn_u64 v[0:1], v2, v[1:2] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1254,7 +1200,6 @@ ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, s7, v0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB5_2: @@ -1295,7 +1240,6 @@ ; GFX8-NEXT: s_mul_i32 s6, s3, s8 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s6, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB5_2: @@ -1335,7 +1279,6 @@ ; GFX9-NEXT: s_mul_i32 s6, s2, s6 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB5_2: @@ -1374,8 +1317,6 @@ ; GFX1064-NEXT: s_add_i32 s8, s8, s7 ; GFX1064-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064-NEXT: v_mov_b32_e32 v1, s8 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1411,8 +1352,6 @@ ; GFX1032-NEXT: s_add_i32 s7, s7, s6 ; GFX1032-NEXT: v_mov_b32_e32 v0, s5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1450,8 +1389,6 @@ ; GFX1164-NEXT: s_add_i32 s8, s8, s7 ; GFX1164-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164-NEXT: v_mov_b32_e32 v1, s8 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1492,8 +1429,6 @@ ; GFX1132-NEXT: s_add_i32 s7, s7, s6 ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_add_rtn_u64 v[0:1], v3, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1566,8 +1501,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_add_rtn_u64 v[0:1], v1, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1580,8 +1514,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_add_rtn_u64 v[0:1], v1, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1615,7 +1548,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB7_2: @@ -1645,7 +1577,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB7_2: @@ -1674,7 +1605,6 @@ ; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB7_2: @@ -1703,8 +1633,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_mul_i32 s4, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1734,8 +1662,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_mul_i32 s3, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v2, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -1768,8 +1694,6 @@ ; GFX1164-NEXT: s_mul_i32 s4, s4, 5 ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -1802,8 +1726,6 @@ ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: s_mul_i32 s3, s3, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -1847,7 +1769,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB8_2: @@ -1879,7 +1800,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB8_2: @@ -1910,7 +1830,6 @@ ; GFX9-NEXT: s_mul_i32 s4, s6, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB8_2: @@ -1941,8 +1860,6 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mul_i32 s4, s6, s4 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -1974,8 +1891,6 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032-NEXT: v_mov_b32_e32 v2, s4 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2010,8 +1925,6 @@ ; GFX1164-NEXT: s_mul_i32 s4, s6, s4 ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2046,8 +1959,6 @@ ; GFX1132-NEXT: s_mul_i32 s4, s2, s4 ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s4 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_rtn_u32 v1, v1, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2118,7 +2029,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB9_4: @@ -2162,7 +2072,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB9_4: @@ -2205,8 +2114,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2247,8 +2154,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2297,8 +2202,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2343,8 +2246,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_rtn_u32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2373,7 +2274,6 @@ ; GFX7LESS: ; %bb.0: ; %entry ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_sub_u32 v1, v0 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_endpgm @@ -2405,7 +2305,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_u32 v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB10_4: @@ -2437,7 +2336,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_u32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB10_4: @@ -2469,8 +2367,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_u32 v0, v1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2499,8 +2395,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_u32 v0, v1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2536,8 +2430,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_u32 v0, v1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2568,8 +2460,6 @@ ; GFX1132-NEXT: s_cbranch_execz .LBB10_4 ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_u32 v0, v1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2599,7 +2489,6 @@ ; GFX7LESS-NEXT: s_mul_i32 s4, s4, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s4 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB11_2: @@ -2633,7 +2522,6 @@ ; GFX8-NEXT: s_mul_i32 s4, s4, 5 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB11_2: @@ -2666,7 +2554,6 @@ ; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX9-NEXT: s_mul_i32 s4, s4, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB11_2: @@ -2699,8 +2586,6 @@ ; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064-NEXT: s_mul_i32 s4, s4, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -2733,8 +2618,6 @@ ; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3 ; GFX1032-NEXT: s_mul_i32 s3, s3, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -2770,8 +2653,6 @@ ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: s_mul_i32 s4, s4, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -2808,8 +2689,6 @@ ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: s_mul_i32 s3, s3, 5 ; GFX1132-NEXT: v_mov_b32_e32 v1, s3 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_rtn_u64 v[0:1], v2, v[1:2] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -2860,7 +2739,6 @@ ; GFX7LESS-NEXT: v_add_i32_e32 v1, vcc, s7, v0 ; GFX7LESS-NEXT: v_mov_b32_e32 v0, s6 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB12_2: @@ -2901,7 +2779,6 @@ ; GFX8-NEXT: s_mul_i32 s6, s3, s8 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s6, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB12_2: @@ -2942,7 +2819,6 @@ ; GFX9-NEXT: s_mul_i32 s6, s2, s6 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB12_2: @@ -2983,8 +2859,6 @@ ; GFX1064-NEXT: s_add_i32 s8, s8, s7 ; GFX1064-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064-NEXT: v_mov_b32_e32 v1, s8 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -3023,8 +2897,6 @@ ; GFX1032-NEXT: s_add_i32 s7, s7, s6 ; GFX1032-NEXT: v_mov_b32_e32 v0, s5 ; GFX1032-NEXT: v_mov_b32_e32 v1, s7 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -3065,8 +2937,6 @@ ; GFX1164-NEXT: s_add_i32 s8, s8, s7 ; GFX1164-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164-NEXT: v_mov_b32_e32 v1, s8 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -3109,8 +2979,6 @@ ; GFX1132-NEXT: s_add_i32 s7, s7, s6 ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s7 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_sub_rtn_u64 v[0:1], v3, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -3185,8 +3053,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; GFX10-NEXT: s_mov_b32 s2, -1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ds_sub_rtn_u64 v[0:1], v1, v[0:1] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3199,8 +3066,7 @@ ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; GFX11-NEXT: s_mov_b32 s2, -1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ds_sub_rtn_u64 v[0:1], v1, v[0:1] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3263,7 +3129,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB14_4: @@ -3307,7 +3172,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB14_4: @@ -3350,8 +3214,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -3392,8 +3254,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -3442,8 +3302,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -3488,8 +3346,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_and_rtn_b32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -3560,7 +3416,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB15_4: @@ -3604,7 +3459,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB15_4: @@ -3647,8 +3501,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -3689,8 +3541,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -3739,8 +3589,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -3785,8 +3633,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_or_rtn_b32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -3857,7 +3703,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB16_4: @@ -3901,7 +3746,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB16_4: @@ -3944,8 +3788,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -3986,8 +3828,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -4036,8 +3876,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -4082,8 +3920,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_xor_rtn_b32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -4154,7 +3990,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB17_4: @@ -4198,7 +4033,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB17_4: @@ -4241,8 +4075,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -4283,8 +4115,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -4333,8 +4163,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -4379,8 +4207,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_max_rtn_i32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -4420,7 +4246,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB18_2: @@ -4455,7 +4280,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB18_2: @@ -4490,7 +4314,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB18_2: @@ -4525,8 +4348,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v0, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -4558,8 +4379,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v0, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -4593,8 +4412,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v0, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -4628,8 +4445,6 @@ ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: v_mov_b32_e32 v0, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_max_rtn_i64 v[0:1], v2, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -4704,7 +4519,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB19_4: @@ -4748,7 +4562,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB19_4: @@ -4791,8 +4604,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -4833,8 +4644,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -4883,8 +4692,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -4929,8 +4736,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_min_rtn_i32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -4970,7 +4775,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB20_2: @@ -5005,7 +4809,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB20_2: @@ -5040,7 +4843,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB20_2: @@ -5075,8 +4877,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v0, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -5108,8 +4908,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v0, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -5143,8 +4941,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v0, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -5178,8 +4974,6 @@ ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: v_mov_b32_e32 v0, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_min_rtn_i64 v[0:1], v2, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -5254,7 +5048,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB21_4: @@ -5298,7 +5091,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB21_4: @@ -5341,8 +5133,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -5383,8 +5173,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -5433,8 +5221,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -5479,8 +5265,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_max_rtn_u32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -5520,7 +5304,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB22_2: @@ -5554,7 +5337,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB22_2: @@ -5587,7 +5369,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB22_2: @@ -5620,8 +5401,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v0, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -5653,8 +5432,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v0, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -5688,8 +5465,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v0, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -5723,8 +5498,6 @@ ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: v_mov_b32_e32 v0, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_max_rtn_u64 v[0:1], v2, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -5799,7 +5572,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB23_4: @@ -5843,7 +5615,6 @@ ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB23_4: @@ -5886,8 +5657,6 @@ ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, s4 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -5928,8 +5697,6 @@ ; GFX1032-NEXT: ; %bb.3: ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, s2 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -5978,8 +5745,6 @@ ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, s4 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -6024,8 +5789,6 @@ ; GFX1132-NEXT: ; %bb.3: ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: v_mov_b32_e32 v2, s2 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_min_rtn_u32 v0, v0, v2 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv @@ -6065,7 +5828,6 @@ ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 ; GFX7LESS-NEXT: s_mov_b32 m0, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: .LBB24_2: @@ -6099,7 +5861,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB24_2: @@ -6132,7 +5893,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 5 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB24_2: @@ -6165,8 +5925,6 @@ ; GFX1064-NEXT: v_mov_b32_e32 v0, 5 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1064-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1064-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: buffer_gl0_inv @@ -6198,8 +5956,6 @@ ; GFX1032-NEXT: v_mov_b32_e32 v0, 5 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1032-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1032-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: buffer_gl0_inv @@ -6233,8 +5989,6 @@ ; GFX1164-NEXT: v_mov_b32_e32 v0, 5 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1164-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1164-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: buffer_gl0_inv @@ -6268,8 +6022,6 @@ ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: v_mov_b32_e32 v0, 5 ; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX1132-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1132-NEXT: ds_min_rtn_u64 v[0:1], v2, v[0:1] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -15,7 +15,6 @@ ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_mov_b32_e32 v4, v3 ; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -39,7 +38,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v5, v3 ; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -57,7 +55,6 @@ ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -74,7 +71,6 @@ ; GFX1100-NEXT: v_mov_b32_e32 v4, v3 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 -; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc ; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -104,7 +100,6 @@ ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_mov_b32_e32 v4, v3 ; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 -; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 @@ -133,7 +128,6 @@ ; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GFX90A-NEXT: s_cbranch_execz .LBB1_3 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc ; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX90A-NEXT: ; implicit-def: $vgpr2 @@ -157,7 +151,7 @@ ; GFX90A-NEXT: ; %bb.7: ; %atomicrmw.shared ; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: ds_add_rtn_f32 v3, v0, v2 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi @@ -208,7 +202,6 @@ ; GFX908-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GFX908-NEXT: s_cbranch_execz .LBB2_5 ; GFX908-NEXT: ; %bb.4: ; %atomicrmw.global -; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off ; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX908-NEXT: ; implicit-def: $vgpr2 @@ -231,7 +224,6 @@ ; GFX908-NEXT: .LBB2_8: ; %atomicrmw.shared ; GFX908-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; GFX908-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: ds_add_f32 v0, v2 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] @@ -260,7 +252,6 @@ ; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GFX90A-NEXT: s_cbranch_execz .LBB2_5 ; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off ; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX90A-NEXT: ; implicit-def: $vgpr2 @@ -283,7 +274,6 @@ ; GFX90A-NEXT: .LBB2_8: ; %atomicrmw.shared ; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_f32 v0, v2 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] @@ -321,7 +311,6 @@ ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_mov_b32_e32 v4, v3 ; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 -; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 @@ -343,7 +332,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v5, v3 ; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 @@ -373,7 +361,6 @@ ; GFX1100-NEXT: v_mov_b32_e32 v4, v3 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 -; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc ; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll @@ -13,7 +13,6 @@ ; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-NEXT: v_not_b32_e32 v1, v2 ; GCN-NEXT: v_or_b32_e32 v1, -5, v1 -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 @@ -40,7 +39,6 @@ ; GCN-NEXT: v_mov_b32_e32 v3, v2 ; GCN-NEXT: v_not_b32_e32 v2, v3 ; GCN-NEXT: v_or_b32_e32 v2, -5, v2 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol @@ -68,7 +66,6 @@ ; GCN-NEXT: v_mov_b32_e32 v3, v2 ; GCN-NEXT: v_not_b32_e32 v2, v3 ; GCN-NEXT: v_or_b32_e32 v2, -5, v2 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll --- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll @@ -45,6 +45,7 @@ ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_barrier ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) @@ -62,7 +63,6 @@ ; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_barrier -; GFX9-NO-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] @@ -73,7 +73,7 @@ ; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] ; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_barrier -; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) ; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] @@ -85,8 +85,6 @@ ; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_barrier -; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: buffer_gl0_inv ; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) @@ -99,8 +97,6 @@ ; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: s_barrier -; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-BACKOFF-NEXT: buffer_gl0_inv ; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0 ; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -56,7 +56,7 @@ ; GCN-LABEL: {{^}}test_global ; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} ; GCN: flat_store_dword -; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_barrier define amdgpu_kernel void @test_global(ptr addrspace(1) %arg) { bb: diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll --- a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll @@ -24,7 +24,6 @@ ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_barrier -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:66 ; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset0:132 offset1:198 ; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset0:8 offset1:74 diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -14,7 +14,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -30,7 +29,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -44,7 +42,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -66,7 +63,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -82,7 +78,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -96,7 +91,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 offset:4092 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -118,7 +112,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -134,7 +127,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -150,7 +142,6 @@ ; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -172,7 +163,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -191,7 +181,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -208,7 +197,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -238,7 +226,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -258,7 +245,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -276,7 +262,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -303,7 +288,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -326,7 +310,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -347,7 +330,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -372,7 +354,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -386,7 +367,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -400,7 +380,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -419,7 +398,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -436,7 +414,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -453,7 +430,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -480,7 +456,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -498,7 +473,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -516,7 +490,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -540,7 +513,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -561,7 +533,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -582,7 +553,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -608,7 +578,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -624,7 +593,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -638,7 +606,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -660,7 +627,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -679,7 +645,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -696,7 +661,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -726,7 +690,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -746,7 +709,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -764,7 +726,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -791,7 +752,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -814,7 +774,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -835,7 +794,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -860,7 +818,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -874,7 +831,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -888,7 +844,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -907,7 +862,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -924,7 +878,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -941,7 +894,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -968,7 +920,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -986,7 +937,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1004,7 +954,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1028,7 +977,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1049,7 +997,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1070,7 +1017,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1096,7 +1042,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1112,7 +1057,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1126,7 +1070,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1148,7 +1091,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1167,7 +1109,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1184,7 +1125,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1214,7 +1154,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1234,7 +1173,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1252,7 +1190,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1279,7 +1216,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1302,7 +1238,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1323,7 +1258,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1348,7 +1282,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1362,7 +1295,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1376,7 +1308,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1395,7 +1326,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1412,7 +1342,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1429,7 +1358,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1456,7 +1384,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1474,7 +1401,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1492,7 +1418,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1516,7 +1441,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1537,7 +1461,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1558,7 +1481,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1584,7 +1506,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1599,7 +1520,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1612,7 +1532,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -1633,7 +1552,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -1652,7 +1570,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -1669,7 +1586,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -1699,7 +1615,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1718,7 +1633,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1735,7 +1649,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -1761,7 +1674,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -1784,7 +1696,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -1805,7 +1716,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -1830,7 +1740,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1843,7 +1752,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1856,7 +1764,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -1874,7 +1781,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -1891,7 +1797,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -1908,7 +1813,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -1935,7 +1839,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1952,7 +1855,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1969,7 +1871,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -1992,7 +1893,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2013,7 +1913,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2034,7 +1933,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smax v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2060,7 +1958,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2075,7 +1972,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2088,7 +1984,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2109,7 +2004,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2128,7 +2022,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2145,7 +2038,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2175,7 +2067,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2194,7 +2085,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2211,7 +2101,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2237,7 +2126,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2260,7 +2148,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2281,7 +2168,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2306,7 +2192,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2319,7 +2204,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2332,7 +2216,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2350,7 +2233,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2367,7 +2249,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2384,7 +2265,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2411,7 +2291,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2428,7 +2307,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2445,7 +2323,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2468,7 +2345,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2489,7 +2365,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2510,7 +2385,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umax v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2536,7 +2410,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2551,7 +2424,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2564,7 +2436,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2585,7 +2456,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2604,7 +2474,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2621,7 +2490,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2651,7 +2519,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2670,7 +2537,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2687,7 +2553,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2713,7 +2578,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2736,7 +2600,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2757,7 +2620,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2782,7 +2644,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2795,7 +2656,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2808,7 +2668,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2826,7 +2685,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2843,7 +2701,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2860,7 +2717,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -2887,7 +2743,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2904,7 +2759,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2921,7 +2775,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -2944,7 +2797,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -2965,7 +2817,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -2986,7 +2837,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_smin v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -3012,7 +2862,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -3027,7 +2876,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -3040,7 +2888,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -3061,7 +2908,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -3080,7 +2926,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -3097,7 +2942,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -3127,7 +2971,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -3146,7 +2989,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -3163,7 +3005,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -3189,7 +3030,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -3212,7 +3052,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -3233,7 +3072,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -3258,7 +3096,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -3271,7 +3108,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -3284,7 +3120,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -3302,7 +3137,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -3319,7 +3153,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -3336,7 +3169,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -3363,7 +3195,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v[0:1], v2 ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -3380,7 +3211,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v[0:1], v2 ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -3397,7 +3227,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v[0:1], v2 ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: s_endpgm @@ -3420,7 +3249,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s6 @@ -3441,7 +3269,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s6 @@ -3462,7 +3289,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: flat_atomic_umin v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s6 @@ -3488,7 +3314,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3504,7 +3329,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3518,7 +3342,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3540,7 +3363,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3559,7 +3381,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3576,7 +3397,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3606,7 +3426,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3626,7 +3445,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3644,7 +3462,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3671,7 +3488,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3694,7 +3510,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3715,7 +3530,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3740,7 +3554,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3754,7 +3567,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3768,7 +3580,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3787,7 +3598,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3804,7 +3614,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3821,7 +3630,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3848,7 +3656,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3866,7 +3673,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3884,7 +3690,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3908,7 +3713,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3929,7 +3733,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3950,7 +3753,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3976,7 +3778,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3992,7 +3793,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4006,7 +3806,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4028,7 +3827,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4044,7 +3842,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4058,7 +3855,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4080,7 +3876,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4099,7 +3894,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4116,7 +3910,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4146,7 +3939,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4166,7 +3958,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4184,7 +3975,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4211,7 +4001,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4234,7 +4023,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4255,7 +4043,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4280,7 +4067,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4294,7 +4080,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4308,7 +4093,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4327,7 +4111,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4344,7 +4127,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4361,7 +4143,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4388,7 +4169,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4406,7 +4186,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4424,7 +4203,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4448,7 +4226,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4469,7 +4246,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4490,7 +4266,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4518,7 +4293,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4534,7 +4308,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4548,7 +4321,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v3, s3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4571,7 +4343,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4591,7 +4362,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4609,7 +4379,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v3, s3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4642,7 +4411,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4664,7 +4432,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4684,7 +4451,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4713,7 +4479,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s9 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4738,7 +4503,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s9 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4761,7 +4525,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v1, s9 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4787,7 +4550,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4801,7 +4563,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4815,7 +4576,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v3, s3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4835,7 +4595,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4853,7 +4612,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4871,7 +4629,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v3, s3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4901,7 +4658,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4921,7 +4677,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4941,7 +4696,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4967,7 +4721,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s9 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4990,7 +4743,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s9 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5013,7 +4765,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v1, s9 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v2, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5040,7 +4791,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5056,7 +4806,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5070,7 +4819,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5092,7 +4840,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5111,7 +4858,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5128,7 +4874,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5158,7 +4903,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5178,7 +4922,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5196,7 +4939,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5223,7 +4965,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5246,7 +4987,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5267,7 +5007,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5292,7 +5031,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5306,7 +5044,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5320,7 +5057,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5339,7 +5075,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5356,7 +5091,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5373,7 +5107,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5400,7 +5133,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5418,7 +5150,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5436,7 +5167,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5460,7 +5190,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5481,7 +5210,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5502,7 +5230,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5526,7 +5253,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5543,7 +5269,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5558,7 +5283,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5580,7 +5304,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5595,7 +5318,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5610,7 +5332,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5637,7 +5358,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5658,7 +5378,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5677,7 +5396,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s1 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5704,7 +5422,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5723,7 +5440,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5742,7 +5458,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s1 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5768,7 +5483,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -5782,7 +5496,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -5794,7 +5507,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -5812,7 +5524,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -5824,7 +5535,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -5836,7 +5546,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -5858,7 +5567,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -5875,7 +5583,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -5890,7 +5597,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -5912,7 +5618,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -5927,7 +5632,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -5942,7 +5646,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -5960,7 +5663,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5977,7 +5679,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5992,7 +5693,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6014,7 +5714,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6029,7 +5728,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6044,7 +5742,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6071,7 +5768,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6092,7 +5788,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6111,7 +5806,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s1 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6138,7 +5832,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dword v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6157,7 +5850,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dword v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6176,7 +5868,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s1 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_dword v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6202,7 +5893,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6216,7 +5906,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6228,7 +5917,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -6246,7 +5934,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6258,7 +5945,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6270,7 +5956,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -6292,7 +5977,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6309,7 +5993,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6324,7 +6007,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -6346,7 +6028,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dword v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6361,7 +6042,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dword v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6376,7 +6056,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -6394,7 +6073,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6411,7 +6089,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6426,7 +6103,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ubyte v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6448,7 +6124,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6463,7 +6138,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6478,7 +6152,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6504,7 +6177,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6524,7 +6196,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ubyte v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6542,7 +6213,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s3 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ubyte v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6569,7 +6239,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_byte v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6583,7 +6252,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_byte v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6595,7 +6263,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_byte v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -6613,7 +6280,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_byte v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6625,7 +6291,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_byte v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6637,7 +6302,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_byte v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -6658,7 +6322,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_byte v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6674,7 +6337,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_byte v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6688,7 +6350,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_byte v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -6707,7 +6368,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6724,7 +6384,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6739,7 +6398,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ushort v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6761,7 +6419,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6776,7 +6433,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6791,7 +6447,6 @@ ; GCN3-NEXT: s_waitcnt lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6818,7 +6473,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6839,7 +6493,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6858,7 +6511,6 @@ ; GCN3-NEXT: s_addc_u32 s1, s5, s1 ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_load_ushort v2, v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6885,7 +6537,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_short v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6899,7 +6550,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_short v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6911,7 +6561,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_short v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -6929,7 +6578,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_short v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6941,7 +6589,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_short v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -6953,7 +6600,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_short v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -6975,7 +6621,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_short v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -6992,7 +6637,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_short v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -7007,7 +6651,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_short v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -7028,7 +6671,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_short v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -7042,7 +6684,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_short v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -7054,7 +6695,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_short v[0:1], v2 offset:16 ; GCN3-NEXT: s_endpgm entry: @@ -7072,7 +6712,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_short v[0:1], v2 ; GCN1-NEXT: s_endpgm ; @@ -7084,7 +6723,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_short v[0:1], v2 ; GCN2-NEXT: s_endpgm ; @@ -7096,7 +6734,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_store_short v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: @@ -7115,7 +6752,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7131,7 +6767,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7145,7 +6780,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7167,7 +6801,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7183,7 +6816,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7197,7 +6829,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 offset:4092 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7219,7 +6850,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7235,7 +6865,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7251,7 +6880,6 @@ ; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7273,7 +6901,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7292,7 +6919,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7309,7 +6935,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7339,7 +6964,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7359,7 +6983,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7377,7 +7000,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7404,7 +7026,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7427,7 +7048,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7448,7 +7068,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7473,7 +7092,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7487,7 +7105,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7501,7 +7118,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7520,7 +7136,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7537,7 +7152,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7554,7 +7168,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7581,7 +7194,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7599,7 +7211,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7617,7 +7228,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7641,7 +7251,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7662,7 +7271,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7683,7 +7291,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7709,7 +7316,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7725,7 +7331,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7739,7 +7344,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7761,7 +7365,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7777,7 +7380,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7791,7 +7393,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 offset:4092 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7813,7 +7414,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7829,7 +7429,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7845,7 +7444,6 @@ ; GCN3-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 ; GCN3-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7867,7 +7465,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7886,7 +7483,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7903,7 +7499,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7933,7 +7528,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7953,7 +7547,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7971,7 +7564,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7998,7 +7590,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8021,7 +7612,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8042,7 +7632,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v2, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8067,7 +7656,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8081,7 +7669,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8095,7 +7682,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s2 ; GCN3-NEXT: v_mov_b32_e32 v1, s3 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8114,7 +7700,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8131,7 +7716,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8148,7 +7732,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8175,7 +7758,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8193,7 +7775,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8211,7 +7792,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8235,7 +7815,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8256,7 +7835,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8277,7 +7855,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s0 ; GCN3-NEXT: v_mov_b32_e32 v1, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v2, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll @@ -41,7 +41,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -52,7 +51,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -104,7 +102,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -115,7 +112,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -140,7 +136,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -152,7 +147,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -164,7 +158,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -182,7 +175,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -196,7 +188,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -208,7 +199,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -225,7 +215,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -237,7 +226,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -249,7 +237,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -267,7 +254,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -281,7 +267,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -293,7 +278,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -341,7 +325,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -352,7 +335,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -404,7 +386,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -415,7 +396,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -440,7 +420,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -452,7 +431,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -464,7 +442,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -482,7 +459,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -496,7 +472,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -508,7 +483,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -525,7 +499,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -537,7 +510,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -549,7 +521,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -567,7 +538,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -581,7 +551,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -593,7 +562,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -641,7 +609,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -652,7 +619,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -704,7 +670,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -715,7 +680,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -740,7 +704,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -752,7 +715,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -764,7 +726,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -782,7 +743,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -796,7 +756,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -808,7 +767,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -825,7 +783,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -837,7 +794,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -849,7 +805,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -867,7 +822,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -881,7 +835,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -893,7 +846,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -941,7 +893,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -952,7 +903,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1004,7 +954,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1015,7 +964,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1040,7 +988,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1052,7 +999,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1064,7 +1010,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1082,7 +1027,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1096,7 +1040,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1108,7 +1051,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1125,7 +1067,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1137,7 +1078,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1149,7 +1089,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1167,7 +1106,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1181,7 +1119,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1193,7 +1130,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1241,7 +1177,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1252,7 +1187,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1304,7 +1238,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1315,7 +1248,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1340,7 +1272,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1352,7 +1283,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1364,7 +1294,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1382,7 +1311,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1396,7 +1324,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1408,7 +1335,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1425,7 +1351,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1437,7 +1362,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1449,7 +1373,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1467,7 +1390,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1481,7 +1403,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1493,7 +1414,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1518,7 +1438,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN1-NEXT: v_not_b32_e32 v3, v3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1541,7 +1460,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN2-NEXT: v_not_b32_e32 v3, v3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1564,7 +1482,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN3-NEXT: v_not_b32_e32 v3, v3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1593,7 +1510,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN1-NEXT: v_not_b32_e32 v3, v3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1618,7 +1534,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN2-NEXT: v_not_b32_e32 v3, v3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1641,7 +1556,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN3-NEXT: v_not_b32_e32 v3, v3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1670,7 +1584,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, v3 ; GCN1-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN1-NEXT: v_not_b32_e32 v3, v3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1694,7 +1607,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, v3 ; GCN2-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN2-NEXT: v_not_b32_e32 v3, v3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1718,7 +1630,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN3-NEXT: v_not_b32_e32 v3, v3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1748,7 +1659,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, v0 ; GCN1-NEXT: v_and_b32_e32 v0, v1, v2 ; GCN1-NEXT: v_not_b32_e32 v0, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1773,7 +1683,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, v0 ; GCN2-NEXT: v_and_b32_e32 v0, v1, v2 ; GCN2-NEXT: v_not_b32_e32 v0, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1796,7 +1705,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_and_b32_e32 v3, v4, v2 ; GCN3-NEXT: v_not_b32_e32 v3, v3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1828,7 +1736,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_not_b32_e32 v0, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1855,7 +1762,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_not_b32_e32 v0, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1882,7 +1788,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_not_b32_e32 v0, v0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1915,7 +1820,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s34 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_not_b32_e32 v0, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1944,7 +1848,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s34 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_not_b32_e32 v0, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1971,7 +1874,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_not_b32_e32 v0, v0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2004,7 +1906,6 @@ ; GCN1-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_not_b32_e32 v0, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2031,7 +1932,6 @@ ; GCN2-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_not_b32_e32 v0, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2058,7 +1958,6 @@ ; GCN3-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_not_b32_e32 v0, v0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2091,7 +1990,6 @@ ; GCN1-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_not_b32_e32 v0, v0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2120,7 +2018,6 @@ ; GCN2-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_not_b32_e32 v0, v0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2147,7 +2044,6 @@ ; GCN3-NEXT: v_and_b32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_not_b32_e32 v0, v0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2201,7 +2097,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2212,7 +2107,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2264,7 +2158,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2275,7 +2168,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2300,7 +2192,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2312,7 +2203,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2324,7 +2214,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2342,7 +2231,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2356,7 +2244,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2368,7 +2255,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2385,7 +2271,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2397,7 +2282,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2409,7 +2293,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2427,7 +2310,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2441,7 +2323,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2453,7 +2334,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2501,7 +2381,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2512,7 +2391,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2564,7 +2442,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2575,7 +2452,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2600,7 +2476,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2612,7 +2487,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2624,7 +2498,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2642,7 +2515,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2656,7 +2528,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2668,7 +2539,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2685,7 +2555,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2697,7 +2566,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2709,7 +2577,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2727,7 +2594,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2741,7 +2607,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2753,7 +2618,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2777,7 +2641,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2799,7 +2662,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2821,7 +2683,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2849,7 +2710,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2873,7 +2733,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2895,7 +2754,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2923,7 +2781,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v4, v3 ; GCN1-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2946,7 +2803,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v4, v3 ; GCN2-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2969,7 +2825,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2998,7 +2853,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v1, v0 ; GCN1-NEXT: v_max_i32_e32 v0, v1, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3022,7 +2876,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v1, v0 ; GCN2-NEXT: v_max_i32_e32 v0, v1, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3044,7 +2897,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_max_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3075,7 +2927,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3101,7 +2952,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3127,7 +2977,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3159,7 +3008,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3187,7 +3035,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3213,7 +3060,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3245,7 +3091,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3271,7 +3116,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3297,7 +3141,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3329,7 +3172,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s34 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3357,7 +3199,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s34 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3383,7 +3224,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_max_i32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3421,7 +3261,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3454,7 +3293,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3485,7 +3323,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3527,7 +3364,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_max_i32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3565,7 +3401,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_max_i32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3601,7 +3436,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_max_i32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3643,7 +3477,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3674,7 +3507,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3705,7 +3537,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_i32_e32 v0, s2, v1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3744,7 +3575,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_max_i32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3780,7 +3610,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_max_i32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3816,7 +3645,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_max_i32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3851,7 +3679,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3873,7 +3700,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3895,7 +3721,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3923,7 +3748,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3947,7 +3771,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3969,7 +3792,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3997,7 +3819,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v4, v3 ; GCN1-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4020,7 +3841,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v4, v3 ; GCN2-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4043,7 +3863,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4072,7 +3891,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v1, v0 ; GCN1-NEXT: v_max_u32_e32 v0, v1, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4096,7 +3914,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v1, v0 ; GCN2-NEXT: v_max_u32_e32 v0, v1, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4118,7 +3935,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_max_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4149,7 +3965,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4175,7 +3990,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4201,7 +4015,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4233,7 +4046,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4261,7 +4073,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4287,7 +4098,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_u32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4319,7 +4129,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4345,7 +4154,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4371,7 +4179,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4403,7 +4210,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s34 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4431,7 +4237,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s34 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4457,7 +4262,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_max_u32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4495,7 +4299,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_max_u32_e32 v0, s2, v1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4528,7 +4331,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_max_u32_e32 v0, s2, v1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4559,7 +4361,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_max_u32_e32 v0, s2, v1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4601,7 +4402,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_max_u32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4639,7 +4439,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_max_u32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4675,7 +4474,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_max_u32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4719,7 +4517,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_max_u32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4755,7 +4552,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_max_u32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4791,7 +4587,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_max_u32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4826,7 +4621,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4848,7 +4642,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4870,7 +4663,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4898,7 +4690,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4922,7 +4713,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4944,7 +4734,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4972,7 +4761,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v4, v3 ; GCN1-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4995,7 +4783,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v4, v3 ; GCN2-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5018,7 +4805,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5047,7 +4833,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v1, v0 ; GCN1-NEXT: v_min_u32_e32 v0, v1, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5071,7 +4856,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v1, v0 ; GCN2-NEXT: v_min_u32_e32 v0, v1, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5093,7 +4877,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_min_u32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5124,7 +4907,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5150,7 +4932,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5176,7 +4957,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5208,7 +4988,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5236,7 +5015,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5262,7 +5040,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_u32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5294,7 +5071,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5320,7 +5096,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5346,7 +5121,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5378,7 +5152,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s34 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5406,7 +5179,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s34 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5432,7 +5204,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_min_u32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5462,7 +5233,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5484,7 +5254,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5506,7 +5275,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5534,7 +5302,6 @@ ; GCN1-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5558,7 +5325,6 @@ ; GCN2-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5580,7 +5346,6 @@ ; GCN3-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5608,7 +5373,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v4, v3 ; GCN1-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5631,7 +5395,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v4, v3 ; GCN2-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5654,7 +5417,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5683,7 +5445,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v1, v0 ; GCN1-NEXT: v_min_i32_e32 v0, v1, v2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5707,7 +5468,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v1, v0 ; GCN2-NEXT: v_min_i32_e32 v0, v1, v2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5729,7 +5489,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_mov_b32_e32 v4, v3 ; GCN3-NEXT: v_min_i32_e32 v3, v4, v2 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5760,7 +5519,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5786,7 +5544,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5812,7 +5569,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5844,7 +5600,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5872,7 +5627,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5898,7 +5652,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v0, s6, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5930,7 +5683,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 ; GCN1-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5956,7 +5708,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 ; GCN2-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5982,7 +5733,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6014,7 +5764,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s34 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 ; GCN1-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6042,7 +5791,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s34 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 ; GCN2-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6068,7 +5816,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 ; GCN3-NEXT: v_min_i32_e32 v0, s6, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6106,7 +5853,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v0, s2, v1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6139,7 +5885,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v0, s2, v1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6170,7 +5915,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v0, s2, v1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6212,7 +5956,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_min_i32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6250,7 +5993,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_min_i32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6286,7 +6028,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_min_i32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6324,7 +6065,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_min_i32_e32 v0, s4, v1 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6351,7 +6091,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_min_i32_e32 v0, s4, v1 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6378,7 +6117,6 @@ ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: v_min_i32_e32 v0, s4, v1 ; GCN3-NEXT: v_mov_b32_e32 v3, s3 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6416,7 +6154,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_min_i32_e32 v0, s4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6452,7 +6189,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_min_i32_e32 v0, s4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6488,7 +6224,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v3, s1 ; GCN3-NEXT: v_mov_b32_e32 v2, s0 ; GCN3-NEXT: v_min_i32_e32 v0, s2, v1 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6547,7 +6282,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6558,7 +6292,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6610,7 +6343,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6621,7 +6353,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6646,7 +6377,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6658,7 +6388,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6670,7 +6399,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6688,7 +6416,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6702,7 +6429,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6714,7 +6440,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6731,7 +6456,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6743,7 +6467,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6755,7 +6478,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6773,7 +6495,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6787,7 +6508,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6799,7 +6519,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6847,7 +6566,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6858,7 +6576,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6910,7 +6627,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 16, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6921,7 +6637,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6946,7 +6661,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6958,7 +6672,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6970,7 +6683,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6988,7 +6700,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v[0:1], v2 ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7002,7 +6713,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v[0:1], v2 ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7014,7 +6724,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v[0:1], v2 offset:16 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7031,7 +6740,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s4 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7043,7 +6751,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s4 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7055,7 +6762,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7073,7 +6779,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s34 ; GCN1-NEXT: v_mov_b32_e32 v1, s35 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7087,7 +6792,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s34 ; GCN2-NEXT: v_mov_b32_e32 v1, s35 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7099,7 +6803,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v0, s4 ; GCN3-NEXT: v_mov_b32_e32 v1, s5 ; GCN3-NEXT: v_mov_b32_e32 v2, s6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec v0, v[0:1], v2 offset:16 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -13,7 +13,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -29,7 +28,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -52,7 +50,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -72,7 +69,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -102,7 +98,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -122,7 +117,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -148,7 +142,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -170,7 +163,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -195,7 +187,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -209,7 +200,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -229,7 +219,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -247,7 +236,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -274,7 +262,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -292,7 +279,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -315,7 +301,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -335,7 +320,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -361,7 +345,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -377,7 +360,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -400,7 +382,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -420,7 +401,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -450,7 +430,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -470,7 +449,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -496,7 +474,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -518,7 +495,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -543,7 +519,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -557,7 +532,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -577,7 +551,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -595,7 +568,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -622,7 +594,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -640,7 +611,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -663,7 +633,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -683,7 +652,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -709,7 +677,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -725,7 +692,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -748,7 +714,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -768,7 +733,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -798,7 +762,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -818,7 +781,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -844,7 +806,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -866,7 +827,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -891,7 +851,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -905,7 +864,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -925,7 +883,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -943,7 +900,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -970,7 +926,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -988,7 +943,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1011,7 +965,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1031,7 +984,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1057,7 +1009,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1072,7 +1023,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1094,7 +1044,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1114,7 +1063,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1144,7 +1092,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1163,7 +1110,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1188,7 +1134,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1210,7 +1155,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1235,7 +1179,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1248,7 +1191,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1267,7 +1209,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s6 @@ -1285,7 +1226,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s6 @@ -1312,7 +1252,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1329,7 +1268,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1351,7 +1289,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1371,7 +1308,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1397,7 +1333,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1412,7 +1347,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1434,7 +1368,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1454,7 +1387,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1484,7 +1416,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1503,7 +1434,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1528,7 +1458,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1550,7 +1479,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1575,7 +1503,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1588,7 +1515,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1607,7 +1533,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s6 @@ -1625,7 +1550,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s6 @@ -1652,7 +1576,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1669,7 +1592,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1691,7 +1613,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1711,7 +1632,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1737,7 +1657,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1752,7 +1671,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1774,7 +1692,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1794,7 +1711,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1824,7 +1740,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1843,7 +1758,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1868,7 +1782,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -1890,7 +1803,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -1915,7 +1827,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -1928,7 +1839,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -1947,7 +1857,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s6 @@ -1965,7 +1874,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s6 @@ -1992,7 +1900,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2009,7 +1916,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2031,7 +1937,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -2051,7 +1956,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -2077,7 +1981,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2092,7 +1995,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2114,7 +2016,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -2134,7 +2035,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -2164,7 +2064,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2183,7 +2082,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2208,7 +2106,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -2230,7 +2127,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -2255,7 +2151,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2268,7 +2163,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2287,7 +2181,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s6 @@ -2305,7 +2198,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s6 @@ -2332,7 +2224,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: s_endpgm @@ -2349,7 +2240,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: s_endpgm @@ -2371,7 +2261,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v2, s2 @@ -2391,7 +2280,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v2, s2 @@ -2417,7 +2305,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2433,7 +2320,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2456,7 +2342,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2476,7 +2361,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2506,7 +2390,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2526,7 +2409,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2552,7 +2434,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2574,7 +2455,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2599,7 +2479,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2613,7 +2492,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2633,7 +2511,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2651,7 +2528,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2678,7 +2554,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2696,7 +2571,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2719,7 +2593,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2739,7 +2612,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2765,7 +2637,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2781,7 +2652,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2803,7 +2673,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2819,7 +2688,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2841,7 +2709,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2857,7 +2724,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2880,7 +2746,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2900,7 +2765,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2930,7 +2794,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2950,7 +2813,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2976,7 +2838,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2998,7 +2859,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3023,7 +2883,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3037,7 +2896,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3057,7 +2915,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3075,7 +2932,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3102,7 +2958,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3120,7 +2975,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3143,7 +2997,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3163,7 +3016,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3189,7 +3041,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3205,7 +3056,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3228,7 +3078,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3248,7 +3097,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3278,7 +3126,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3298,7 +3145,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3324,7 +3170,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3346,7 +3191,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3371,7 +3215,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3385,7 +3228,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3405,7 +3247,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3423,7 +3264,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3450,7 +3290,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3468,7 +3307,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3491,7 +3329,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3511,7 +3348,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3535,7 +3371,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3552,7 +3387,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3574,7 +3408,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3589,7 +3422,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3616,7 +3448,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3637,7 +3468,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3664,7 +3494,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3683,7 +3512,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3709,7 +3537,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s3, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -3723,7 +3550,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s3, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -3741,7 +3567,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -3753,7 +3578,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -3776,7 +3600,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -3794,7 +3617,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -3817,7 +3639,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -3833,7 +3654,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -3856,7 +3676,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v4, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3875,7 +3694,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v4, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3900,7 +3718,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v4, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3919,7 +3736,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v4, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3943,7 +3759,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s6 ; GCN1-NEXT: v_mov_b32_e32 v3, s7 ; GCN1-NEXT: v_mov_b32_e32 v4, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3964,7 +3779,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s6 ; GCN2-NEXT: v_mov_b32_e32 v3, s7 ; GCN2-NEXT: v_mov_b32_e32 v4, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3996,7 +3810,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s6 ; GCN1-NEXT: v_mov_b32_e32 v3, s7 ; GCN1-NEXT: v_mov_b32_e32 v4, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4017,7 +3830,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s6 ; GCN2-NEXT: v_mov_b32_e32 v3, s7 ; GCN2-NEXT: v_mov_b32_e32 v4, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4046,7 +3858,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s9 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v4, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4071,7 +3882,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s9 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v4, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4100,7 +3910,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4117,7 +3926,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4138,7 +3946,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s6 ; GCN1-NEXT: v_mov_b32_e32 v3, s7 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4157,7 +3964,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s6 ; GCN2-NEXT: v_mov_b32_e32 v3, s7 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4186,7 +3992,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s6 ; GCN1-NEXT: v_mov_b32_e32 v3, s7 ; GCN1-NEXT: v_mov_b32_e32 v4, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4205,7 +4010,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s6 ; GCN2-NEXT: v_mov_b32_e32 v3, s7 ; GCN2-NEXT: v_mov_b32_e32 v4, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4231,7 +4035,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v4, s2 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4254,7 +4057,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v4, s2 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4279,7 +4081,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4296,7 +4097,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4318,7 +4118,6 @@ ; GCN1-NEXT: s_waitcnt lgkmcnt(0) ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4333,7 +4132,6 @@ ; GCN2-NEXT: s_waitcnt lgkmcnt(0) ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4360,7 +4158,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4381,7 +4178,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4408,7 +4204,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v0, s0 ; GCN1-NEXT: v_mov_b32_e32 v1, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4427,7 +4222,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v0, s0 ; GCN2-NEXT: v_mov_b32_e32 v1, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4453,7 +4247,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s3, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -4467,7 +4260,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s3, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -4485,7 +4277,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -4497,7 +4288,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -4520,7 +4310,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -4538,7 +4327,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -4561,7 +4349,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN1-NEXT: s_endpgm ; @@ -4577,7 +4364,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN2-NEXT: s_endpgm entry: @@ -4597,7 +4383,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4613,7 +4398,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4636,7 +4420,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4656,7 +4439,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4686,7 +4468,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4706,7 +4487,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4732,7 +4512,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4754,7 +4533,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4779,7 +4557,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4793,7 +4570,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4813,7 +4589,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4831,7 +4606,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4858,7 +4632,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4876,7 +4649,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4899,7 +4671,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4919,7 +4690,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4945,7 +4715,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s2 ; GCN1-NEXT: v_mov_b32_e32 v1, s3 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4961,7 +4730,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s2 ; GCN2-NEXT: v_mov_b32_e32 v1, s3 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4984,7 +4752,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5004,7 +4771,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5034,7 +4800,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5054,7 +4819,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5080,7 +4844,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, 0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5102,7 +4865,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, 0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5127,7 +4889,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s2 ; GCN1-NEXT: v_mov_b32_e32 v3, s3 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5141,7 +4902,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s2 ; GCN2-NEXT: v_mov_b32_e32 v3, s3 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5161,7 +4921,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s5 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5179,7 +4938,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s5 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5206,7 +4964,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5224,7 +4981,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5247,7 +5003,6 @@ ; GCN1-NEXT: s_addc_u32 s1, s1, s5 ; GCN1-NEXT: v_mov_b32_e32 v3, s1 ; GCN1-NEXT: v_mov_b32_e32 v2, s0 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5267,7 +5022,6 @@ ; GCN2-NEXT: s_addc_u32 s1, s1, s5 ; GCN2-NEXT: v_mov_b32_e32 v3, s1 ; GCN2-NEXT: v_mov_b32_e32 v2, s0 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll @@ -41,7 +41,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -52,7 +51,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -104,7 +102,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -115,7 +112,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -141,7 +137,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -154,7 +149,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -167,7 +161,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -186,7 +179,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -201,7 +193,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -214,7 +205,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -232,7 +222,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -245,7 +234,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -258,7 +246,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -277,7 +264,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -292,7 +278,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -305,7 +290,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -353,7 +337,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -364,7 +347,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -416,7 +398,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -427,7 +408,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -453,7 +433,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -466,7 +445,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -479,7 +457,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -498,7 +475,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -513,7 +489,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -526,7 +501,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -544,7 +518,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -557,7 +530,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -570,7 +542,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -589,7 +560,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -604,7 +574,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -617,7 +586,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -665,7 +633,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -676,7 +643,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -728,7 +694,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -739,7 +704,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -765,7 +729,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -778,7 +741,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -791,7 +753,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -810,7 +771,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -825,7 +785,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -838,7 +797,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -856,7 +814,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -869,7 +826,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -882,7 +838,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -901,7 +856,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -916,7 +870,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -929,7 +882,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -977,7 +929,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -988,7 +939,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1040,7 +990,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1051,7 +1000,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1077,7 +1025,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1090,7 +1037,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1103,7 +1049,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1122,7 +1067,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1137,7 +1081,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1150,7 +1093,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1168,7 +1110,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1181,7 +1122,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1194,7 +1134,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1213,7 +1152,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1228,7 +1166,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1241,7 +1178,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1289,7 +1225,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1300,7 +1235,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1352,7 +1286,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1363,7 +1296,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1389,7 +1321,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1402,7 +1333,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1415,7 +1345,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1434,7 +1363,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1449,7 +1377,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1462,7 +1389,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1480,7 +1406,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1493,7 +1418,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1506,7 +1430,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1525,7 +1448,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1540,7 +1462,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1553,7 +1474,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1583,7 +1503,6 @@ ; GCN1-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN1-NEXT: v_not_b32_e32 v5, v4 ; GCN1-NEXT: v_not_b32_e32 v4, v8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1612,7 +1531,6 @@ ; GCN2-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN2-NEXT: v_not_b32_e32 v5, v4 ; GCN2-NEXT: v_not_b32_e32 v4, v8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1638,7 +1556,6 @@ ; GCN3-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN3-NEXT: v_not_b32_e32 v5, v4 ; GCN3-NEXT: v_not_b32_e32 v4, v8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1673,7 +1590,6 @@ ; GCN1-NEXT: v_and_b32_e32 v1, v6, v2 ; GCN1-NEXT: v_not_b32_e32 v5, v0 ; GCN1-NEXT: v_not_b32_e32 v4, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1704,7 +1620,6 @@ ; GCN2-NEXT: v_and_b32_e32 v1, v6, v2 ; GCN2-NEXT: v_not_b32_e32 v5, v0 ; GCN2-NEXT: v_not_b32_e32 v4, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1730,7 +1645,6 @@ ; GCN3-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN3-NEXT: v_not_b32_e32 v5, v4 ; GCN3-NEXT: v_not_b32_e32 v4, v8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1766,7 +1680,6 @@ ; GCN1-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN1-NEXT: v_not_b32_e32 v5, v4 ; GCN1-NEXT: v_not_b32_e32 v4, v8 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1797,7 +1710,6 @@ ; GCN2-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN2-NEXT: v_not_b32_e32 v5, v4 ; GCN2-NEXT: v_not_b32_e32 v4, v8 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1825,7 +1737,6 @@ ; GCN3-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN3-NEXT: v_not_b32_e32 v5, v4 ; GCN3-NEXT: v_not_b32_e32 v4, v8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1862,7 +1773,6 @@ ; GCN1-NEXT: v_and_b32_e32 v1, v8, v2 ; GCN1-NEXT: v_not_b32_e32 v7, v0 ; GCN1-NEXT: v_not_b32_e32 v6, v1 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1893,7 +1803,6 @@ ; GCN2-NEXT: v_and_b32_e32 v1, v8, v2 ; GCN2-NEXT: v_not_b32_e32 v7, v0 ; GCN2-NEXT: v_not_b32_e32 v6, v1 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -1919,7 +1828,6 @@ ; GCN3-NEXT: v_and_b32_e32 v8, v6, v2 ; GCN3-NEXT: v_not_b32_e32 v5, v4 ; GCN3-NEXT: v_not_b32_e32 v4, v8 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -1959,7 +1867,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_not_b32_e32 v1, v0 ; GCN1-NEXT: v_not_b32_e32 v0, v6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -1994,7 +1901,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_not_b32_e32 v1, v0 ; GCN2-NEXT: v_not_b32_e32 v0, v6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2024,7 +1930,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_not_b32_e32 v1, v0 ; GCN3-NEXT: v_not_b32_e32 v0, v6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2065,7 +1970,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_not_b32_e32 v1, v0 ; GCN1-NEXT: v_not_b32_e32 v0, v6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2102,7 +2006,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_not_b32_e32 v1, v0 ; GCN2-NEXT: v_not_b32_e32 v0, v6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2132,7 +2035,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_not_b32_e32 v1, v0 ; GCN3-NEXT: v_not_b32_e32 v0, v6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2174,7 +2076,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_not_b32_e32 v1, v0 ; GCN1-NEXT: v_not_b32_e32 v0, v6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2209,7 +2110,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_not_b32_e32 v1, v0 ; GCN2-NEXT: v_not_b32_e32 v0, v6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2239,7 +2139,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_not_b32_e32 v1, v0 ; GCN3-NEXT: v_not_b32_e32 v0, v6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2280,7 +2179,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_not_b32_e32 v1, v0 ; GCN1-NEXT: v_not_b32_e32 v0, v6 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2317,7 +2215,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_not_b32_e32 v1, v0 ; GCN2-NEXT: v_not_b32_e32 v0, v6 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2347,7 +2244,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_not_b32_e32 v1, v0 ; GCN3-NEXT: v_not_b32_e32 v0, v6 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2401,7 +2297,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2412,7 +2307,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2464,7 +2358,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2475,7 +2368,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2501,7 +2393,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2514,7 +2405,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2527,7 +2417,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2546,7 +2435,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2561,7 +2449,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2574,7 +2461,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2592,7 +2478,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2605,7 +2490,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2618,7 +2502,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2637,7 +2520,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2652,7 +2534,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2665,7 +2546,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2713,7 +2593,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2724,7 +2603,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2776,7 +2654,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2787,7 +2664,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2813,7 +2689,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2826,7 +2701,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2839,7 +2713,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2858,7 +2731,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2873,7 +2745,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2886,7 +2757,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2904,7 +2774,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2917,7 +2786,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2930,7 +2798,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -2949,7 +2816,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -2964,7 +2830,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -2977,7 +2842,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3006,7 +2870,6 @@ ; GCN1-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3034,7 +2897,6 @@ ; GCN2-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3059,7 +2921,6 @@ ; GCN3-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3093,7 +2954,6 @@ ; GCN1-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3123,7 +2983,6 @@ ; GCN2-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3148,7 +3007,6 @@ ; GCN3-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3183,7 +3041,6 @@ ; GCN1-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3213,7 +3070,6 @@ ; GCN2-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3240,7 +3096,6 @@ ; GCN3-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3276,7 +3131,6 @@ ; GCN1-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3306,7 +3160,6 @@ ; GCN2-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3331,7 +3184,6 @@ ; GCN3-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3372,7 +3224,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3408,7 +3259,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3439,7 +3289,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3481,7 +3330,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3519,7 +3367,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3550,7 +3397,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3593,7 +3439,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3629,7 +3474,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3660,7 +3504,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3702,7 +3545,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3740,7 +3582,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3771,7 +3612,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3812,7 +3652,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3849,7 +3688,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -3884,7 +3722,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -3929,7 +3766,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -3969,7 +3805,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4007,7 +3842,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4052,7 +3886,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4087,7 +3920,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4122,7 +3954,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4164,7 +3995,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4202,7 +4032,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4240,7 +4069,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4280,7 +4108,6 @@ ; GCN1-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4308,7 +4135,6 @@ ; GCN2-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4333,7 +4159,6 @@ ; GCN3-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4367,7 +4192,6 @@ ; GCN1-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4397,7 +4221,6 @@ ; GCN2-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4422,7 +4245,6 @@ ; GCN3-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4457,7 +4279,6 @@ ; GCN1-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4487,7 +4308,6 @@ ; GCN2-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4514,7 +4334,6 @@ ; GCN3-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4550,7 +4369,6 @@ ; GCN1-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4580,7 +4398,6 @@ ; GCN2-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4605,7 +4422,6 @@ ; GCN3-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4646,7 +4462,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4682,7 +4497,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4713,7 +4527,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4755,7 +4568,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4793,7 +4605,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4824,7 +4635,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4867,7 +4677,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -4903,7 +4712,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -4934,7 +4742,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -4976,7 +4783,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5014,7 +4820,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5045,7 +4850,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5086,7 +4890,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5123,7 +4926,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5158,7 +4960,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5203,7 +5004,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5243,7 +5043,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5281,7 +5080,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5327,7 +5125,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5365,7 +5162,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5403,7 +5199,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5443,7 +5238,6 @@ ; GCN1-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5471,7 +5265,6 @@ ; GCN2-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5496,7 +5289,6 @@ ; GCN3-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5530,7 +5322,6 @@ ; GCN1-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5560,7 +5351,6 @@ ; GCN2-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5585,7 +5375,6 @@ ; GCN3-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5620,7 +5409,6 @@ ; GCN1-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5650,7 +5438,6 @@ ; GCN2-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5677,7 +5464,6 @@ ; GCN3-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5713,7 +5499,6 @@ ; GCN1-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5743,7 +5528,6 @@ ; GCN2-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5768,7 +5552,6 @@ ; GCN3-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5809,7 +5592,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5845,7 +5627,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5876,7 +5657,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -5918,7 +5698,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -5956,7 +5735,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -5987,7 +5765,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6030,7 +5807,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6066,7 +5842,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6097,7 +5872,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6139,7 +5913,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6177,7 +5950,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6208,7 +5980,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6243,7 +6014,6 @@ ; GCN1-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6271,7 +6041,6 @@ ; GCN2-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6296,7 +6065,6 @@ ; GCN3-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6330,7 +6098,6 @@ ; GCN1-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6360,7 +6127,6 @@ ; GCN2-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6385,7 +6151,6 @@ ; GCN3-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6420,7 +6185,6 @@ ; GCN1-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6450,7 +6214,6 @@ ; GCN2-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6477,7 +6240,6 @@ ; GCN3-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6513,7 +6275,6 @@ ; GCN1-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] ; GCN1-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6543,7 +6304,6 @@ ; GCN2-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] ; GCN2-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6568,7 +6328,6 @@ ; GCN3-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GCN3-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6609,7 +6368,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6645,7 +6403,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6676,7 +6433,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6718,7 +6474,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6756,7 +6511,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6787,7 +6541,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6830,7 +6583,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s5 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6866,7 +6618,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s5 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -6897,7 +6648,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -6939,7 +6689,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v5, s35 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -6977,7 +6726,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v5, s35 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7008,7 +6756,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v5, s5 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7049,7 +6796,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7086,7 +6832,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7121,7 +6866,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7166,7 +6910,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7206,7 +6949,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7244,7 +6986,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7285,7 +7026,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7316,7 +7056,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7347,7 +7086,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7388,7 +7126,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v4, s0 ; GCN1-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN1-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7426,7 +7163,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v4, s0 ; GCN2-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN2-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7464,7 +7200,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v4, s0 ; GCN3-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GCN3-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7523,7 +7258,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7534,7 +7268,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7586,7 +7319,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7597,7 +7329,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7623,7 +7354,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7636,7 +7366,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7649,7 +7378,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7668,7 +7396,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7683,7 +7410,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7696,7 +7422,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7714,7 +7439,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7727,7 +7451,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7740,7 +7463,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7759,7 +7481,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7774,7 +7495,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7787,7 +7507,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7835,7 +7554,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7846,7 +7564,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7898,7 +7615,6 @@ ; GCN1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN1-NEXT: v_add_i32_e32 v0, vcc, 32, v0 ; GCN1-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7909,7 +7625,6 @@ ; GCN2-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN2-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GCN2-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7935,7 +7650,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7948,7 +7662,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -7961,7 +7674,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -7980,7 +7692,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -7995,7 +7706,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8008,7 +7718,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] offset:32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8026,7 +7735,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v2, s4 ; GCN1-NEXT: v_mov_b32_e32 v3, s5 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8039,7 +7747,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v2, s4 ; GCN2-NEXT: v_mov_b32_e32 v3, s5 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8052,7 +7759,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol @@ -8071,7 +7777,6 @@ ; GCN1-NEXT: v_mov_b32_e32 v0, s6 ; GCN1-NEXT: v_mov_b32_e32 v1, s7 ; GCN1-NEXT: v_mov_b32_e32 v3, s35 -; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN1-NEXT: buffer_wbinvl1_vol @@ -8086,7 +7791,6 @@ ; GCN2-NEXT: v_mov_b32_e32 v0, s6 ; GCN2-NEXT: v_mov_b32_e32 v1, s7 ; GCN2-NEXT: v_mov_b32_e32 v3, s35 -; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN2-NEXT: buffer_wbinvl1_vol @@ -8099,7 +7803,6 @@ ; GCN3-NEXT: v_mov_b32_e32 v1, s7 ; GCN3-NEXT: v_mov_b32_e32 v2, s4 ; GCN3-NEXT: v_mov_b32_e32 v3, s5 -; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] offset:32 glc ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll --- a/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll +++ b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll @@ -106,7 +106,7 @@ ; FORCESC0SC1-NEXT: v_mov_b32_e32 v0, 0 ; FORCESC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 ; FORCESC0SC1-NEXT: buffer_wbl2 sc1 -; FORCESC0SC1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; FORCESC0SC1-NEXT: s_waitcnt lgkmcnt(0) ; FORCESC0SC1-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 ; FORCESC0SC1-NEXT: s_endpgm ; @@ -116,7 +116,7 @@ ; NOSC0SC1-NEXT: v_mov_b32_e32 v0, 0 ; NOSC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 ; NOSC0SC1-NEXT: buffer_wbl2 sc1 -; NOSC0SC1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; NOSC0SC1-NEXT: s_waitcnt lgkmcnt(0) ; NOSC0SC1-NEXT: global_store_dword v0, v1, s[0:1] sc1 ; NOSC0SC1-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll @@ -32,7 +32,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -49,7 +48,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -75,7 +73,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -191,9 +188,8 @@ ; GFX940-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NEXT: v_mov_b32_e32 v1, s1 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_bf16 v0, v1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) @@ -205,9 +201,8 @@ ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -1192,7 +1192,6 @@ ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1211,7 +1210,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1228,7 +1227,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1240,7 +1239,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1264,7 +1263,6 @@ ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1283,7 +1281,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1300,7 +1298,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1312,7 +1310,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1353,7 +1351,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1373,7 +1370,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1389,7 +1385,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1400,7 +1395,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1422,7 +1416,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1442,7 +1435,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1503,7 +1495,6 @@ ; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1521,7 +1512,7 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1545,7 +1536,6 @@ ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1565,7 +1555,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1583,7 +1572,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1596,7 +1584,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1620,7 +1607,6 @@ ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1641,7 +1627,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1663,7 +1648,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1683,7 +1667,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1699,7 +1682,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1710,7 +1692,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1732,7 +1713,6 @@ ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -1753,7 +1733,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 ; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 @@ -1823,7 +1802,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -1842,7 +1820,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2048,7 +2025,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm @@ -2059,7 +2035,6 @@ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_endpgm @@ -2076,7 +2051,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm @@ -2087,7 +2061,6 @@ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_f64 v2, v[0:1] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_endpgm @@ -2109,7 +2082,6 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 ; GFX90A-NEXT: v_mov_b32_e32 v4, s2 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1] @@ -2132,7 +2104,6 @@ ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 ; GFX940-NEXT: v_mov_b32_e32 v4, s2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1] @@ -2153,7 +2124,6 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -2162,7 +2132,6 @@ ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll @@ -13,11 +13,10 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 16 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_nop 0 ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_endpgm @@ -35,11 +34,10 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 16 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_nop 0 ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:140 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 @@ -65,11 +63,10 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 32 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_nop 0 ; GCN-NEXT: ds_add_u32 v1, v0 offset:28 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 @@ -87,11 +84,10 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_movk_i32 m0, 0x420 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_nop 0 ; GCN-NEXT: ds_add_u32 v1, v0 offset:1052 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:1036 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 @@ -118,7 +114,6 @@ ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll @@ -14,7 +14,7 @@ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 4.0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -19,7 +19,6 @@ ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -45,7 +44,6 @@ ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_mov_b32_e32 v2, v1 ; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -72,7 +70,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v3, v1 ; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -99,8 +96,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -127,8 +122,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -162,7 +155,6 @@ ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -188,7 +180,6 @@ ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_mov_b32_e32 v2, v1 ; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -206,7 +197,7 @@ ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v0, v0, v1, s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -226,8 +217,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -245,8 +234,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v0, v0, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -273,7 +261,6 @@ ; GFX900-NEXT: .LBB2_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -290,7 +277,7 @@ ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: v_mov_b32_e32 v0, 0 ; GFX908-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -301,7 +288,7 @@ ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -319,8 +306,6 @@ ; GFX10-NEXT: .LBB2_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -337,8 +322,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -361,7 +345,6 @@ ; GFX900-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -378,7 +361,7 @@ ; GFX908-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX908-NEXT: v_mov_b32_e32 v0, 0 ; GFX908-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: s_waitcnt lgkmcnt(0) ; GFX908-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -389,7 +372,7 @@ ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -407,8 +390,6 @@ ; GFX10-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -425,8 +406,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -450,7 +430,6 @@ ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -476,7 +455,6 @@ ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_mov_b32_e32 v2, v1 ; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -494,7 +472,7 @@ ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: global_atomic_add_f32 v0, v0, v1, s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -514,8 +492,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -533,8 +509,7 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v0, v0, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -562,7 +537,6 @@ ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_mov_b32_e32 v2, v1 ; GFX900-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -588,7 +562,6 @@ ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_mov_b32_e32 v2, v1 ; GFX908-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -615,7 +588,6 @@ ; GFX90A-NEXT: v_mov_b32_e32 v3, v1 ; GFX90A-NEXT: v_add_f32_e32 v2, 4.0, v3 ; GFX90A-NEXT: buffer_wbl2 -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_invl2 @@ -642,8 +614,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v2, v1 ; GFX10-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -670,8 +640,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -705,7 +673,6 @@ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_mov_b32_e32 v2, v1 ; GCN-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol @@ -731,7 +698,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: global_atomic_cmpswap v1, v0, v[1:2], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_wbinvl1_vol @@ -754,7 +720,7 @@ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 4.0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol @@ -765,7 +731,7 @@ ; GFX11-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_atomic_add_f32 v0, v1, s[0:1] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_wbinvl1_vol @@ -787,7 +753,6 @@ ; GFX900-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_wbinvl1_vol @@ -811,7 +776,6 @@ ; GFX908-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX908-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: buffer_wbinvl1_vol @@ -835,7 +799,6 @@ ; GFX90A-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol @@ -859,8 +822,6 @@ ; GFX10-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -885,8 +846,6 @@ ; GFX11-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_add_f32_e32 v0, 4.0, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll @@ -23,7 +23,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -47,8 +46,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -76,8 +73,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -111,7 +106,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -135,8 +129,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -164,8 +156,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -198,7 +188,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -220,8 +209,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -246,8 +233,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -278,7 +263,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -300,8 +284,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -326,8 +308,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -363,7 +343,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -391,8 +370,6 @@ ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -424,8 +401,6 @@ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -463,7 +438,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -491,8 +465,6 @@ ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -524,8 +496,6 @@ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -562,7 +532,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -587,8 +556,6 @@ ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -616,8 +583,6 @@ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -651,7 +616,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -676,8 +640,6 @@ ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -705,8 +667,6 @@ ; GFX11-NEXT: v_cmp_gt_i64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -745,7 +705,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -769,8 +728,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -798,8 +755,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -833,7 +788,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -857,8 +811,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -886,8 +838,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -920,7 +870,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -942,8 +891,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -968,8 +915,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1000,7 +945,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1022,8 +966,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1048,8 +990,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_i32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1085,7 +1025,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1113,8 +1052,6 @@ ; GFX10-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1146,8 +1083,6 @@ ; GFX11-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1185,7 +1120,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1213,8 +1147,6 @@ ; GFX10-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1246,8 +1178,6 @@ ; GFX11-NEXT: v_cmp_le_i64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1284,7 +1214,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1309,8 +1238,6 @@ ; GFX10-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1338,8 +1265,6 @@ ; GFX11-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1373,7 +1298,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1398,8 +1322,6 @@ ; GFX10-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1427,8 +1349,6 @@ ; GFX11-NEXT: v_cmp_le_i64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1467,7 +1387,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1491,8 +1410,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1520,8 +1437,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1555,7 +1470,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1579,8 +1493,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1608,8 +1520,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1642,7 +1552,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1664,8 +1573,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1690,8 +1597,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1722,7 +1627,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1744,8 +1648,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1770,8 +1672,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1807,7 +1707,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1835,8 +1734,6 @@ ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1868,8 +1765,6 @@ ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1907,7 +1802,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1935,8 +1829,6 @@ ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1968,8 +1860,6 @@ ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2006,7 +1896,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2031,8 +1920,6 @@ ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2060,8 +1947,6 @@ ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2095,7 +1980,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2120,8 +2004,6 @@ ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2149,8 +2031,6 @@ ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2189,7 +2069,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2213,8 +2092,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2242,8 +2119,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2277,7 +2152,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2301,8 +2175,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, v0 ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2330,8 +2202,6 @@ ; GFX11-NEXT: v_mov_b32_e32 v5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2364,7 +2234,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2386,8 +2255,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2412,8 +2279,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2444,7 +2309,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2466,8 +2330,6 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], v[4:5], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2492,8 +2354,6 @@ ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_min_u32_e32 v4, v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2529,7 +2389,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2557,8 +2416,6 @@ ; GFX10-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2590,8 +2447,6 @@ ; GFX11-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2629,7 +2484,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2657,8 +2511,6 @@ ; GFX10-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2690,8 +2542,6 @@ ; GFX11-NEXT: v_cmp_le_u64_e32 vcc, v[9:10], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v8, v2, v10, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v7, v1, v9, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[5:6], v[7:10], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2728,7 +2578,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2753,8 +2602,6 @@ ; GFX10-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2782,8 +2629,6 @@ ; GFX11-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2817,7 +2662,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2842,8 +2686,6 @@ ; GFX10-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2871,8 +2713,6 @@ ; GFX11-NEXT: v_cmp_le_u64_e32 vcc, v[5:6], v[1:2] ; GFX11-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[3:4], v[7:8], v[3:6], off offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll @@ -8,7 +8,6 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -16,8 +15,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -26,8 +23,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -43,7 +38,6 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_2047(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_2047: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -51,8 +45,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_2047: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:2047 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -61,8 +53,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_2047: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:2047 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -79,7 +69,6 @@ define amdgpu_ps void @global_xchg_saddr_i32_nortn_offset_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -87,8 +76,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v1, s[2:3] offset:-2048 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -97,8 +84,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_nortn_offset_neg2048: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[2:3] offset:-2048 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -114,7 +99,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -122,8 +106,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -132,8 +114,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -149,7 +129,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:2048 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -161,8 +140,6 @@ ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v[2:3], v1, off glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -171,8 +148,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -189,7 +164,6 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_neg2048(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xchg_saddr_i32_rtn_neg2048: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -197,8 +171,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_rtn_neg2048: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[2:3] offset:-2048 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -207,8 +179,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i32_rtn_neg2048: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -237,8 +207,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_nop 3 +; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -251,8 +220,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -266,8 +233,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -290,8 +255,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_nop 3 +; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -304,8 +268,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -319,8 +281,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -344,8 +304,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_nop 3 +; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -358,8 +317,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -373,8 +330,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -396,8 +351,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s1, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_nop 3 +; GFX9-NEXT: s_nop 4 ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -410,8 +364,6 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: v_readfirstlane_b32 s1, v3 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -425,8 +377,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v3 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] offset:42 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -451,7 +401,6 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xchg_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -459,8 +408,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -469,8 +416,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -486,7 +431,6 @@ define amdgpu_ps <2 x float> @global_xchg_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xchg_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -494,8 +438,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -504,8 +446,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -522,7 +462,6 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xchg_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -530,8 +469,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -540,8 +477,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -556,7 +491,6 @@ define amdgpu_ps void @global_xchg_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xchg_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -564,8 +498,6 @@ ; ; GFX10-LABEL: global_xchg_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_swap_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -574,8 +506,6 @@ ; ; GFX11-LABEL: global_xchg_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_swap_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -595,7 +525,6 @@ define amdgpu_ps float @global_add_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_add_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -603,8 +532,6 @@ ; ; GFX10-LABEL: global_add_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -613,8 +540,6 @@ ; ; GFX11-LABEL: global_add_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -630,7 +555,6 @@ define amdgpu_ps float @global_add_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_add_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -638,8 +562,6 @@ ; ; GFX10-LABEL: global_add_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -648,8 +570,6 @@ ; ; GFX11-LABEL: global_add_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -666,7 +586,6 @@ define amdgpu_ps void @global_add_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_add_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -674,8 +593,6 @@ ; ; GFX10-LABEL: global_add_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -684,8 +601,6 @@ ; ; GFX11-LABEL: global_add_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -700,7 +615,6 @@ define amdgpu_ps void @global_add_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_add_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -708,8 +622,6 @@ ; ; GFX10-LABEL: global_add_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -718,8 +630,6 @@ ; ; GFX11-LABEL: global_add_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -735,7 +645,6 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_add_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -743,8 +652,6 @@ ; ; GFX10-LABEL: global_add_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -753,8 +660,6 @@ ; ; GFX11-LABEL: global_add_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -770,7 +675,6 @@ define amdgpu_ps <2 x float> @global_add_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_add_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -778,8 +682,6 @@ ; ; GFX10-LABEL: global_add_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -788,8 +690,6 @@ ; ; GFX11-LABEL: global_add_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -806,7 +706,6 @@ define amdgpu_ps void @global_add_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_add_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -814,8 +713,6 @@ ; ; GFX10-LABEL: global_add_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -824,8 +721,6 @@ ; ; GFX11-LABEL: global_add_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -840,7 +735,6 @@ define amdgpu_ps void @global_add_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_add_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -848,8 +742,6 @@ ; ; GFX10-LABEL: global_add_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_add_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -858,8 +750,6 @@ ; ; GFX11-LABEL: global_add_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_add_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -879,7 +769,6 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_sub_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -887,8 +776,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -897,8 +784,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -914,7 +799,6 @@ define amdgpu_ps float @global_sub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_sub_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -922,8 +806,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -932,8 +814,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -950,7 +830,6 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_sub_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -958,8 +837,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -968,8 +845,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -984,7 +859,6 @@ define amdgpu_ps void @global_sub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_sub_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -992,8 +866,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1002,8 +874,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1019,7 +889,6 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_sub_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1027,8 +896,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1037,8 +904,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1054,7 +919,6 @@ define amdgpu_ps <2 x float> @global_sub_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_sub_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1062,8 +926,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1072,8 +934,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1090,7 +950,6 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_sub_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1098,8 +957,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1108,8 +965,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1124,7 +979,6 @@ define amdgpu_ps void @global_sub_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_sub_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1132,8 +986,6 @@ ; ; GFX10-LABEL: global_sub_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_sub_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1142,8 +994,6 @@ ; ; GFX11-LABEL: global_sub_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_sub_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1163,7 +1013,6 @@ define amdgpu_ps float @global_and_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_and_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1171,8 +1020,6 @@ ; ; GFX10-LABEL: global_and_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1181,8 +1028,6 @@ ; ; GFX11-LABEL: global_and_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1198,7 +1043,6 @@ define amdgpu_ps float @global_and_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_and_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1206,8 +1050,6 @@ ; ; GFX10-LABEL: global_and_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1216,8 +1058,6 @@ ; ; GFX11-LABEL: global_and_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1234,7 +1074,6 @@ define amdgpu_ps void @global_and_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_and_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1242,8 +1081,6 @@ ; ; GFX10-LABEL: global_and_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1252,8 +1089,6 @@ ; ; GFX11-LABEL: global_and_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1268,7 +1103,6 @@ define amdgpu_ps void @global_and_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_and_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1276,8 +1110,6 @@ ; ; GFX10-LABEL: global_and_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1286,8 +1118,6 @@ ; ; GFX11-LABEL: global_and_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1303,7 +1133,6 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_and_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1311,8 +1140,6 @@ ; ; GFX10-LABEL: global_and_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1321,8 +1148,6 @@ ; ; GFX11-LABEL: global_and_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1338,7 +1163,6 @@ define amdgpu_ps <2 x float> @global_and_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_and_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1346,8 +1170,6 @@ ; ; GFX10-LABEL: global_and_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1356,8 +1178,6 @@ ; ; GFX11-LABEL: global_and_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1374,7 +1194,6 @@ define amdgpu_ps void @global_and_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_and_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1382,8 +1201,6 @@ ; ; GFX10-LABEL: global_and_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1392,8 +1209,6 @@ ; ; GFX11-LABEL: global_and_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1408,7 +1223,6 @@ define amdgpu_ps void @global_and_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_and_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1416,8 +1230,6 @@ ; ; GFX10-LABEL: global_and_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_and_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1426,8 +1238,6 @@ ; ; GFX11-LABEL: global_and_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_and_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1447,7 +1257,6 @@ define amdgpu_ps float @global_or_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_or_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1455,8 +1264,6 @@ ; ; GFX10-LABEL: global_or_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1465,8 +1272,6 @@ ; ; GFX11-LABEL: global_or_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1482,7 +1287,6 @@ define amdgpu_ps float @global_or_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_or_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1490,8 +1294,6 @@ ; ; GFX10-LABEL: global_or_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1500,8 +1302,6 @@ ; ; GFX11-LABEL: global_or_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1518,7 +1318,6 @@ define amdgpu_ps void @global_or_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_or_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1526,8 +1325,6 @@ ; ; GFX10-LABEL: global_or_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1536,8 +1333,6 @@ ; ; GFX11-LABEL: global_or_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1552,7 +1347,6 @@ define amdgpu_ps void @global_or_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_or_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1560,8 +1354,6 @@ ; ; GFX10-LABEL: global_or_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1570,8 +1362,6 @@ ; ; GFX11-LABEL: global_or_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1587,7 +1377,6 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_or_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1595,8 +1384,6 @@ ; ; GFX10-LABEL: global_or_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1605,8 +1392,6 @@ ; ; GFX11-LABEL: global_or_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1622,7 +1407,6 @@ define amdgpu_ps <2 x float> @global_or_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_or_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1630,8 +1414,6 @@ ; ; GFX10-LABEL: global_or_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1640,8 +1422,6 @@ ; ; GFX11-LABEL: global_or_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1658,7 +1438,6 @@ define amdgpu_ps void @global_or_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_or_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1666,8 +1445,6 @@ ; ; GFX10-LABEL: global_or_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1676,8 +1453,6 @@ ; ; GFX11-LABEL: global_or_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1692,7 +1467,6 @@ define amdgpu_ps void @global_or_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_or_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1700,8 +1474,6 @@ ; ; GFX10-LABEL: global_or_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_or_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1710,8 +1482,6 @@ ; ; GFX11-LABEL: global_or_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_or_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1731,7 +1501,6 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xor_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1739,8 +1508,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1749,8 +1516,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1766,7 +1531,6 @@ define amdgpu_ps float @global_xor_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xor_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1774,8 +1538,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1784,8 +1546,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1802,7 +1562,6 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xor_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1810,8 +1569,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1820,8 +1577,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1836,7 +1591,6 @@ define amdgpu_ps void @global_xor_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_xor_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1844,8 +1598,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1854,8 +1606,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1871,7 +1621,6 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xor_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1879,8 +1628,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1889,8 +1636,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1906,7 +1651,6 @@ define amdgpu_ps <2 x float> @global_xor_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xor_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1914,8 +1658,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -1924,8 +1666,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -1942,7 +1682,6 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xor_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1950,8 +1689,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1960,8 +1697,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -1976,7 +1711,6 @@ define amdgpu_ps void @global_xor_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_xor_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -1984,8 +1718,6 @@ ; ; GFX10-LABEL: global_xor_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_xor_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -1994,8 +1726,6 @@ ; ; GFX11-LABEL: global_xor_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_xor_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2015,15 +1745,12 @@ define amdgpu_ps float @global_max_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_max_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_max_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2031,8 +1758,6 @@ ; ; GFX11-LABEL: global_max_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2047,15 +1772,12 @@ define amdgpu_ps float @global_max_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_max_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_max_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2063,8 +1785,6 @@ ; ; GFX11-LABEL: global_max_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2080,14 +1800,11 @@ define amdgpu_ps void @global_max_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_max_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_max_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2095,8 +1812,6 @@ ; ; GFX11-LABEL: global_max_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2110,14 +1825,11 @@ define amdgpu_ps void @global_max_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_max_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_max_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2125,8 +1837,6 @@ ; ; GFX11-LABEL: global_max_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2141,15 +1851,12 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_max_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_max_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2157,8 +1864,6 @@ ; ; GFX11-LABEL: global_max_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2173,15 +1878,12 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_max_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_max_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2189,8 +1891,6 @@ ; ; GFX11-LABEL: global_max_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2206,14 +1906,11 @@ define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_max_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_max_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2221,8 +1918,6 @@ ; ; GFX11-LABEL: global_max_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2236,14 +1931,11 @@ define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_max_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_max_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smax_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2251,8 +1943,6 @@ ; ; GFX11-LABEL: global_max_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_i64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2271,15 +1961,12 @@ define amdgpu_ps float @global_min_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_min_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_min_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2287,8 +1974,6 @@ ; ; GFX11-LABEL: global_min_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2303,15 +1988,12 @@ define amdgpu_ps float @global_min_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_min_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_min_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2319,8 +2001,6 @@ ; ; GFX11-LABEL: global_min_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2336,14 +2016,11 @@ define amdgpu_ps void @global_min_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_min_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_min_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2351,8 +2028,6 @@ ; ; GFX11-LABEL: global_min_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2366,14 +2041,11 @@ define amdgpu_ps void @global_min_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_min_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_min_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2381,8 +2053,6 @@ ; ; GFX11-LABEL: global_min_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2397,15 +2067,12 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_min_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_min_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2413,8 +2080,6 @@ ; ; GFX11-LABEL: global_min_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2429,15 +2094,12 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_min_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_min_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2445,8 +2107,6 @@ ; ; GFX11-LABEL: global_min_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2462,14 +2122,11 @@ define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_min_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_min_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2477,8 +2134,6 @@ ; ; GFX11-LABEL: global_min_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2492,14 +2147,11 @@ define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_min_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_min_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_smin_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2507,8 +2159,6 @@ ; ; GFX11-LABEL: global_min_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_i64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2527,15 +2177,12 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umax_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umax_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2543,8 +2190,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2559,15 +2204,12 @@ define amdgpu_ps float @global_umax_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umax_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umax_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2575,8 +2217,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2592,14 +2232,11 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umax_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umax_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2607,8 +2244,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2622,14 +2257,11 @@ define amdgpu_ps void @global_umax_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umax_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umax_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2637,8 +2269,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2653,15 +2283,12 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umax_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umax_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2669,8 +2296,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2685,15 +2310,12 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umax_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umax_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2701,8 +2323,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2718,14 +2338,11 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umax_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umax_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2733,8 +2350,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2748,14 +2363,11 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umax_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umax_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umax_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2763,8 +2375,6 @@ ; ; GFX11-LABEL: global_umax_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_max_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2783,15 +2393,12 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umin_saddr_i32_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umin_saddr_i32_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2799,8 +2406,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i32_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2815,15 +2420,12 @@ define amdgpu_ps float @global_umin_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umin_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umin_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin v0, v0, v1, s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2831,8 +2433,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u32 v0, v0, v1, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2848,14 +2448,11 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umin_saddr_i32_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umin_saddr_i32_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2863,8 +2460,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i32_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2878,14 +2473,11 @@ define amdgpu_ps void @global_umin_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GFX9-LABEL: global_umin_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umin_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin v0, v1, s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2893,8 +2485,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -2909,15 +2499,12 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umin_saddr_i64_rtn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umin_saddr_i64_rtn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2925,8 +2512,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i64_rtn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2941,15 +2526,12 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umin_saddr_i64_rtn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: global_umin_saddr_i64_rtn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin_x2 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2957,8 +2539,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i64_rtn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u64 v[0:1], v0, v[1:2], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2974,14 +2554,11 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umin_saddr_i64_nortn: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umin_saddr_i64_nortn: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -2989,8 +2566,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i64_nortn: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3004,14 +2579,11 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { ; GFX9-LABEL: global_umin_saddr_i64_nortn_neg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_umin_saddr_i64_nortn_neg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_umin_x2 v0, v[1:2], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3019,8 +2591,6 @@ ; ; GFX11-LABEL: global_umin_saddr_i64_nortn_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_min_u64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3040,7 +2610,6 @@ ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3049,8 +2618,6 @@ ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v3, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3060,8 +2627,6 @@ ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3079,7 +2644,6 @@ ; GFX9-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3088,8 +2652,6 @@ ; GFX10-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v3, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v0, v[2:3], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3099,8 +2661,6 @@ ; GFX11-LABEL: global_cmpxchg_saddr_i32_rtn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v0, v[2:3], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3119,7 +2679,6 @@ ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3128,8 +2687,6 @@ ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v3, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3139,8 +2696,6 @@ ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3156,7 +2711,6 @@ ; GFX9-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3165,8 +2719,6 @@ ; GFX10-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v3, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap v0, v[2:3], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3176,8 +2728,6 @@ ; GFX11-LABEL: global_cmpxchg_saddr_i32_nortn_neg128: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3195,7 +2745,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v6, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3205,8 +2754,6 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v6, v2 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3217,8 +2764,6 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v6, v2 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3237,7 +2782,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v6, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3247,8 +2791,6 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v6, v2 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -3259,8 +2801,6 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v6, v2 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v[0:1], v0, v[3:6], s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -3280,7 +2820,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v6, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3290,8 +2829,6 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v6, v2 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3302,8 +2839,6 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v6, v2 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv @@ -3320,7 +2855,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v6, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -3330,8 +2864,6 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v6, v2 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_atomic_cmpswap_x2 v0, v[3:6], s[2:3] offset:-128 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_gl0_inv @@ -3342,8 +2874,6 @@ ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v6, v2 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_atomic_cmpswap_b64 v0, v[3:6], s[2:3] offset:-128 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -2547,7 +2547,6 @@ define amdgpu_ps float @atomic_global_load_saddr_i32(ptr addrspace(1) inreg %sbase, i32 %voffset) { ; GFX9-LABEL: atomic_global_load_saddr_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2555,8 +2554,6 @@ ; ; GFX10-LABEL: atomic_global_load_saddr_i32: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2565,8 +2562,6 @@ ; ; GFX11-LABEL: atomic_global_load_saddr_i32: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2582,7 +2577,6 @@ define amdgpu_ps float @atomic_global_load_saddr_i32_immneg128(ptr addrspace(1) inreg %sbase, i32 %voffset) { ; GFX9-LABEL: atomic_global_load_saddr_i32_immneg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v0, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2590,8 +2584,6 @@ ; ; GFX10-LABEL: atomic_global_load_saddr_i32_immneg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] offset:-128 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2600,8 +2592,6 @@ ; ; GFX11-LABEL: atomic_global_load_saddr_i32_immneg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2618,7 +2608,6 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64(ptr addrspace(1) inreg %sbase, i32 %voffset) { ; GFX9-LABEL: atomic_global_load_saddr_i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2626,8 +2615,6 @@ ; ; GFX10-LABEL: atomic_global_load_saddr_i64: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2636,8 +2623,6 @@ ; ; GFX11-LABEL: atomic_global_load_saddr_i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv @@ -2653,7 +2638,6 @@ define amdgpu_ps <2 x float> @atomic_global_load_saddr_i64_immneg128(ptr addrspace(1) inreg %sbase, i32 %voffset) { ; GFX9-LABEL: atomic_global_load_saddr_i64_immneg128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:-128 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1 @@ -2661,8 +2645,6 @@ ; ; GFX10-LABEL: atomic_global_load_saddr_i64_immneg128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:-128 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_gl0_inv @@ -2671,8 +2653,6 @@ ; ; GFX11-LABEL: atomic_global_load_saddr_i64_immneg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:-128 glc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll @@ -1064,23 +1064,13 @@ ; -------------------------------------------------------------------------------- define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { -; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_store_dword v0, v1, s[2:3] -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_store_dword v0, v1, s[2:3] -; GFX10-NEXT: s_endpgm +; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr: +; GCN: ; %bb.0: +; GCN-NEXT: global_store_dword v0, v1, s[2:3] +; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1092,23 +1082,13 @@ } define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { -; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 -; GFX10-NEXT: s_endpgm +; GCN-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: +; GCN: ; %bb.0: +; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:-128 +; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1121,23 +1101,13 @@ } define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { -; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] -; GFX10-NEXT: s_endpgm +; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr: +; GCN: ; %bb.0: +; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] +; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1149,23 +1119,13 @@ } define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i64 %data) { -; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 -; GFX10-NEXT: s_endpgm +; GCN-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: +; GCN: ; %bb.0: +; GCN-NEXT: global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128 +; GCN-NEXT: s_endpgm ; ; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: global_store_b64 v0, v[1:2], s[2:3] offset:-128 ; GFX11-NEXT: s_nop 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -12,7 +12,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -26,7 +25,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -39,7 +37,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -61,7 +58,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -77,7 +73,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -90,7 +85,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:-4096 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -111,7 +105,6 @@ ; SI-NEXT: s_mov_b32 s5, 0x8ca0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[0:3], s5 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -126,7 +119,6 @@ ; VI-NEXT: s_mov_b32 s5, 0x8ca0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add v0, off, s[0:3], s5 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -139,7 +131,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x8000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] offset:3232 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -161,7 +152,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, 0xabcd ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -177,7 +167,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -192,7 +181,6 @@ ; GFX9-NEXT: s_add_u32 s0, s2, 0xdeac ; GFX9-NEXT: s_addc_u32 s1, s3, 0xabcd ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -217,7 +205,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -236,7 +223,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -250,7 +236,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -276,7 +261,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -296,7 +280,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -313,7 +296,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -341,7 +323,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -363,7 +344,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -385,7 +365,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -408,7 +387,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -422,7 +400,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -435,7 +412,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -456,7 +432,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -475,7 +450,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -491,7 +465,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -516,7 +489,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -534,7 +506,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -551,7 +522,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -578,7 +548,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -598,7 +567,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -620,7 +588,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -642,7 +609,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -656,7 +622,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -669,7 +634,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -693,7 +657,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -712,7 +675,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -726,7 +688,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -752,7 +713,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -772,7 +732,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -789,7 +748,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -817,7 +775,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -839,7 +796,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -861,7 +817,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -884,7 +839,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -898,7 +852,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -911,7 +864,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -932,7 +884,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -951,7 +902,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -967,7 +917,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -992,7 +941,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1010,7 +958,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1027,7 +974,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1054,7 +1000,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1074,7 +1019,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1096,7 +1040,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1118,7 +1061,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1132,7 +1074,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1145,7 +1086,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1169,7 +1109,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1188,7 +1127,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1202,7 +1140,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1228,7 +1165,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1248,7 +1184,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1265,7 +1200,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1293,7 +1227,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1315,7 +1248,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1337,7 +1269,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1360,7 +1291,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1374,7 +1304,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1387,7 +1316,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1408,7 +1336,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1427,7 +1354,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1443,7 +1369,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1468,7 +1393,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1486,7 +1410,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1503,7 +1426,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1530,7 +1452,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1550,7 +1471,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1572,7 +1492,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1594,7 +1513,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1608,7 +1526,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1621,7 +1538,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1645,7 +1561,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1663,7 +1578,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1676,7 +1590,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -1701,7 +1614,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -1719,7 +1631,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -1734,7 +1645,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -1760,7 +1670,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1781,7 +1690,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -1802,7 +1710,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -1824,7 +1731,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -1836,7 +1742,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -1847,7 +1752,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -1866,7 +1770,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 glc ; SI-NEXT: s_mov_b32 s0, s6 ; SI-NEXT: s_mov_b32 s1, s7 @@ -1884,7 +1787,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax v0, off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -1899,7 +1801,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -1923,7 +1824,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -1939,7 +1839,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -1954,7 +1853,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -1979,7 +1877,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smax v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1998,7 +1895,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -2019,7 +1915,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2040,7 +1935,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -2052,7 +1946,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_endpgm ; @@ -2063,7 +1956,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -2085,7 +1977,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2103,7 +1994,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2116,7 +2006,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2141,7 +2030,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -2159,7 +2047,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -2174,7 +2061,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -2200,7 +2086,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -2221,7 +2106,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -2242,7 +2126,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2264,7 +2147,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -2276,7 +2158,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -2287,7 +2168,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -2306,7 +2186,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 glc ; SI-NEXT: s_mov_b32 s0, s6 ; SI-NEXT: s_mov_b32 s1, s7 @@ -2324,7 +2203,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax v0, off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -2339,7 +2217,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2363,7 +2240,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -2379,7 +2255,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -2394,7 +2269,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2419,7 +2293,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umax v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -2438,7 +2311,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -2459,7 +2331,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2480,7 +2351,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -2492,7 +2362,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_endpgm ; @@ -2503,7 +2372,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -2525,7 +2393,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2543,7 +2410,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2556,7 +2422,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2581,7 +2446,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -2599,7 +2463,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -2614,7 +2477,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -2640,7 +2502,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -2661,7 +2522,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -2682,7 +2542,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2704,7 +2563,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -2716,7 +2574,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -2727,7 +2584,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -2746,7 +2602,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 glc ; SI-NEXT: s_mov_b32 s0, s6 ; SI-NEXT: s_mov_b32 s1, s7 @@ -2764,7 +2619,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin v0, off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -2779,7 +2633,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2803,7 +2656,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -2819,7 +2671,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -2834,7 +2685,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2859,7 +2709,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_smin v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -2878,7 +2727,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -2899,7 +2747,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -2920,7 +2767,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -2932,7 +2778,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_endpgm ; @@ -2943,7 +2788,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -2965,7 +2809,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2983,7 +2826,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2996,7 +2838,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -3021,7 +2862,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -3039,7 +2879,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -3054,7 +2893,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -3080,7 +2918,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -3101,7 +2938,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -3122,7 +2958,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -3144,7 +2979,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -3156,7 +2990,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; @@ -3167,7 +3000,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -3186,7 +3018,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 glc ; SI-NEXT: s_mov_b32 s0, s6 ; SI-NEXT: s_mov_b32 s1, s7 @@ -3204,7 +3035,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin v0, off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -3219,7 +3049,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -3243,7 +3072,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -3259,7 +3087,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -3274,7 +3101,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -3299,7 +3125,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_atomic_umin v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -3318,7 +3143,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin v0, v[0:1], v2 glc ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 @@ -3339,7 +3163,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] @@ -3360,7 +3183,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3374,7 +3196,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3387,7 +3208,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3411,7 +3231,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3430,7 +3249,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3444,7 +3262,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3470,7 +3287,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3490,7 +3306,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3507,7 +3322,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3535,7 +3349,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3557,7 +3370,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3579,7 +3391,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3602,7 +3413,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3616,7 +3426,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3629,7 +3438,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3650,7 +3458,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3669,7 +3476,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3685,7 +3491,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3710,7 +3515,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3728,7 +3532,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3745,7 +3548,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3772,7 +3574,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3792,7 +3593,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3814,7 +3614,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3836,7 +3635,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3850,7 +3648,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3863,7 +3660,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3883,7 +3679,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3897,7 +3692,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3910,7 +3704,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3934,7 +3727,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3953,7 +3745,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3967,7 +3758,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3993,7 +3783,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4013,7 +3802,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4030,7 +3818,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4058,7 +3845,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4080,7 +3866,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4102,7 +3887,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4125,7 +3909,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4139,7 +3922,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4152,7 +3934,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4173,7 +3954,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4192,7 +3972,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4208,7 +3987,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4233,7 +4011,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4251,7 +4028,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4268,7 +4044,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4295,7 +4070,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4315,7 +4089,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4337,7 +4110,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4361,7 +4133,6 @@ ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: v_mov_b32_e32 v0, s2 ; SI-NEXT: v_mov_b32_e32 v1, s3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4377,7 +4148,6 @@ ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4390,7 +4160,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4415,7 +4184,6 @@ ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4435,7 +4203,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4450,7 +4217,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4479,7 +4245,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: v_mov_b32_e32 v2, s4 ; SI-NEXT: v_mov_b32_e32 v3, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], v[2:3], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4501,7 +4266,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4520,7 +4284,6 @@ ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4550,7 +4313,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, s10 ; SI-NEXT: v_mov_b32_e32 v2, s8 ; SI-NEXT: v_mov_b32_e32 v3, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4574,7 +4336,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4598,7 +4359,6 @@ ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-NEXT: v_mov_b32_e32 v1, s9 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4624,7 +4384,6 @@ ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: v_mov_b32_e32 v0, s2 ; SI-NEXT: v_mov_b32_e32 v1, s3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4640,7 +4399,6 @@ ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4653,7 +4411,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4675,7 +4432,6 @@ ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4695,7 +4451,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4712,7 +4467,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4740,7 +4494,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: v_mov_b32_e32 v2, s4 ; SI-NEXT: v_mov_b32_e32 v3, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], v[2:3], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4760,7 +4513,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4779,7 +4531,6 @@ ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4808,7 +4559,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, s10 ; SI-NEXT: v_mov_b32_e32 v2, s8 ; SI-NEXT: v_mov_b32_e32 v3, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[0:1], v[2:3], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4830,7 +4580,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4854,7 +4603,6 @@ ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-NEXT: v_mov_b32_e32 v1, s9 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4877,7 +4625,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4891,7 +4638,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4904,7 +4650,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4928,7 +4673,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4947,7 +4691,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4961,7 +4704,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4987,7 +4729,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5007,7 +4748,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5024,7 +4764,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5052,7 +4791,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5074,7 +4812,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5096,7 +4833,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5119,7 +4855,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5133,7 +4868,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5146,7 +4880,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5167,7 +4900,6 @@ ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5186,7 +4918,6 @@ ; VI-NEXT: s_mov_b32 s0, s4 ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5202,7 +4933,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v1, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5227,7 +4957,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5245,7 +4974,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5262,7 +4990,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5289,7 +5016,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5309,7 +5035,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5331,7 +5056,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v1, v0, v1, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5355,7 +5079,6 @@ ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, s6 ; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5372,7 +5095,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5385,7 +5107,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5410,7 +5132,6 @@ ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 ; SI-NEXT: v_mov_b32_e32 v1, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5428,7 +5149,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, -1 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5441,7 +5161,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] offset:-512 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5465,7 +5185,6 @@ ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, s6 ; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5482,7 +5201,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5495,7 +5213,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5517,7 +5235,6 @@ ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5534,7 +5251,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5547,7 +5263,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5573,7 +5289,6 @@ ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5595,7 +5310,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5613,7 +5327,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5641,7 +5354,6 @@ ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5661,7 +5373,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5679,7 +5390,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5706,7 +5416,6 @@ ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5728,7 +5437,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dword v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5746,7 +5454,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5769,7 +5476,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -5783,7 +5489,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -5794,7 +5499,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -5812,7 +5516,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -5824,7 +5527,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -5835,7 +5537,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -5852,7 +5553,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -5864,7 +5564,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -5875,7 +5574,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -5895,7 +5593,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -5912,7 +5609,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -5926,7 +5622,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -5948,7 +5643,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_endpgm ; @@ -5965,7 +5659,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -5979,7 +5672,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -6002,7 +5694,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -6017,7 +5708,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6031,7 +5721,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -6053,7 +5742,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s8 ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: v_mov_b32_e32 v1, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -6068,7 +5756,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6082,7 +5769,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -6102,7 +5788,6 @@ ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, s6 ; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6119,7 +5804,6 @@ ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: s_mov_b32 s2, s6 ; VI-NEXT: s_mov_b32 s3, s7 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6130,7 +5814,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6155,7 +5839,6 @@ ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 ; SI-NEXT: v_mov_b32_e32 v1, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6173,7 +5856,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, -1 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_ubyte v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6186,7 +5868,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v1, v0, s[0:1] offset:-512 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6208,7 +5890,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -6222,7 +5903,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_byte v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6233,7 +5913,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_byte v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -6251,7 +5930,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -6263,7 +5941,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_byte v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6274,7 +5951,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_byte v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -6293,7 +5969,6 @@ ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_mov_b32 s2, s6 ; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6310,7 +5985,6 @@ ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: s_mov_b32 s2, s6 ; VI-NEXT: s_mov_b32 s3, s7 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6321,7 +5995,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6346,7 +6020,6 @@ ; SI-NEXT: s_mov_b32 s2, 0 ; SI-NEXT: s_mov_b32 s3, s7 ; SI-NEXT: v_mov_b32_e32 v1, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6364,7 +6037,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, -1 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_ushort v0, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6377,7 +6049,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:-512 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6399,7 +6071,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -6413,7 +6084,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6424,7 +6094,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_short v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -6442,7 +6111,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -6454,7 +6122,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6465,7 +6132,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_short v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -6482,7 +6148,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_endpgm ; @@ -6496,7 +6161,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6507,7 +6171,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_short v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_endpgm entry: @@ -6525,7 +6188,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -6537,7 +6199,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_short v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -6548,7 +6209,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_short v0, v1, s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -6565,7 +6225,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6579,7 +6238,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_inc v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6592,7 +6250,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6614,7 +6271,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6630,7 +6286,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6643,7 +6298,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[2:3] offset:-4096 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6664,7 +6318,6 @@ ; SI-NEXT: s_mov_b32 s5, 0x8ca0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v0, off, s[0:3], s5 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6679,7 +6332,6 @@ ; VI-NEXT: s_mov_b32 s5, 0x8ca0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_inc v0, off, s[0:3], s5 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6692,7 +6344,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x8000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[2:3] offset:3232 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6714,7 +6365,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, 0xabcd ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6730,7 +6380,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6745,7 +6394,6 @@ ; GFX9-NEXT: s_add_u32 s0, s2, 0xdeac ; GFX9-NEXT: s_addc_u32 s1, s3, 0xabcd ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6769,7 +6417,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6788,7 +6435,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_inc v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6802,7 +6448,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6828,7 +6473,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6848,7 +6492,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6865,7 +6508,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6893,7 +6535,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6915,7 +6556,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6937,7 +6577,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6960,7 +6599,6 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v0, off, s[0:3], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6974,7 +6612,6 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_dec v0, off, s[0:3], 0 offset:16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6987,7 +6624,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[2:3] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7009,7 +6645,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7025,7 +6660,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7038,7 +6672,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[2:3] offset:-4096 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7059,7 +6692,6 @@ ; SI-NEXT: s_mov_b32 s5, 0x8ca0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v0, off, s[0:3], s5 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7074,7 +6706,6 @@ ; VI-NEXT: s_mov_b32 s5, 0x8ca0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_dec v0, off, s[0:3], s5 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7087,7 +6718,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x8000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[2:3] offset:3232 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7109,7 +6739,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, 0xabcd ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v2, s0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7125,7 +6754,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7140,7 +6768,6 @@ ; GFX9-NEXT: s_add_u32 s0, s2, 0xdeac ; GFX9-NEXT: s_addc_u32 s1, s3, 0xabcd ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7164,7 +6791,6 @@ ; SI-NEXT: s_mov_b32 s6, s2 ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: v_mov_b32_e32 v0, s8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7183,7 +6809,6 @@ ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: v_mov_b32_e32 v0, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_dec v0, off, s[4:7], 0 offset:16 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7197,7 +6822,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7223,7 +6847,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s6 ; SI-NEXT: v_mov_b32_e32 v0, s4 ; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[0:3], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7243,7 +6866,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7260,7 +6882,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[0:1] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7288,7 +6909,6 @@ ; SI-NEXT: v_mov_b32_e32 v2, s2 ; SI-NEXT: v_mov_b32_e32 v0, s8 ; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7310,7 +6930,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7332,7 +6951,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v1, v0, v1, s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll @@ -15,7 +15,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -49,7 +48,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -61,7 +59,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -87,7 +84,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -122,7 +118,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -135,7 +130,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -167,7 +161,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -185,7 +179,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -196,7 +189,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -219,7 +211,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -239,7 +231,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -250,7 +241,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -274,7 +264,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -292,7 +282,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -303,7 +292,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -326,7 +314,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -346,7 +334,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -357,7 +344,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -438,7 +424,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -535,7 +520,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -547,7 +531,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -638,7 +621,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -740,7 +722,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -753,7 +734,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -859,7 +839,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -877,7 +857,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -888,7 +867,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -989,7 +967,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1009,7 +987,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1020,7 +997,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1121,7 +1097,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1139,7 +1115,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1150,7 +1125,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1254,7 +1228,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1274,7 +1248,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1285,7 +1258,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1307,7 +1279,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1341,7 +1312,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1353,7 +1323,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1379,7 +1348,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1414,7 +1382,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1427,7 +1394,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1459,7 +1425,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1477,7 +1443,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1488,7 +1453,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1511,7 +1475,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1531,7 +1495,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1542,7 +1505,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1566,7 +1528,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1584,7 +1546,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1595,7 +1556,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1618,7 +1578,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1638,7 +1598,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1649,7 +1608,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1671,7 +1629,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1705,7 +1662,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1717,7 +1673,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1743,7 +1698,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1778,7 +1732,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1791,7 +1744,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1823,7 +1775,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1841,7 +1793,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1852,7 +1803,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1875,7 +1825,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1895,7 +1845,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1906,7 +1855,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1930,7 +1878,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1948,7 +1896,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1959,7 +1906,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1982,7 +1928,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2002,7 +1948,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2013,7 +1958,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2035,7 +1979,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2069,7 +2012,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2081,7 +2023,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2107,7 +2048,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2142,7 +2082,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2155,7 +2094,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2187,7 +2125,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2205,7 +2143,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2216,7 +2153,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2239,7 +2175,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2259,7 +2195,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2270,7 +2205,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2294,7 +2228,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2312,7 +2246,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2323,7 +2256,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2346,7 +2278,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2366,7 +2298,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2377,7 +2308,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2409,7 +2339,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2433,7 +2362,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_and_b32_e32 v3, v4, v2 ; VI-NEXT: v_not_b32_e32 v3, v3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2456,7 +2384,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v3, v4, v2 ; GFX9-NEXT: v_not_b32_e32 v3, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2490,7 +2417,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2516,7 +2442,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_and_b32_e32 v3, v4, v2 ; VI-NEXT: v_not_b32_e32 v3, v3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2539,7 +2464,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v3, v4, v2 ; GFX9-NEXT: v_not_b32_e32 v3, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2575,7 +2499,6 @@ ; SI-NEXT: v_not_b32_e32 v4, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2600,7 +2523,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, v3 ; VI-NEXT: v_and_b32_e32 v3, v4, v2 ; VI-NEXT: v_not_b32_e32 v3, v3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2624,7 +2546,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_and_b32_e32 v3, v4, v2 ; GFX9-NEXT: v_not_b32_e32 v3, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2659,7 +2580,6 @@ ; SI-NEXT: v_not_b32_e32 v4, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2686,7 +2606,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, v0 ; VI-NEXT: v_and_b32_e32 v0, v1, v2 ; VI-NEXT: v_not_b32_e32 v0, v0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2709,7 +2628,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_and_b32_e32 v3, v4, v2 ; GFX9-NEXT: v_not_b32_e32 v3, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2749,7 +2667,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2782,7 +2699,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_not_b32_e32 v0, v0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2806,7 +2722,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v0, s6, v1 ; GFX9-NEXT: v_not_b32_e32 v0, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2845,7 +2760,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2880,7 +2794,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s34 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_not_b32_e32 v0, v0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2904,7 +2817,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v0, s6, v1 ; GFX9-NEXT: v_not_b32_e32 v0, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2945,7 +2857,6 @@ ; SI-NEXT: v_not_b32_e32 v3, v0 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2979,7 +2890,6 @@ ; VI-NEXT: v_and_b32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_not_b32_e32 v0, v0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3003,7 +2913,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_and_b32_e32 v0, s6, v3 ; GFX9-NEXT: v_not_b32_e32 v2, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3042,7 +2951,6 @@ ; SI-NEXT: v_not_b32_e32 v3, v0 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3078,7 +2986,6 @@ ; VI-NEXT: v_and_b32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_not_b32_e32 v0, v0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3102,7 +3009,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_and_b32_e32 v0, s6, v3 ; GFX9-NEXT: v_not_b32_e32 v2, v0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3130,7 +3036,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3164,7 +3069,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3176,7 +3080,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3202,7 +3105,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3237,7 +3139,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3250,7 +3151,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3282,7 +3182,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3300,7 +3200,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3311,7 +3210,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3334,7 +3232,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3354,7 +3252,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3365,7 +3262,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3389,7 +3285,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3407,7 +3303,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3418,7 +3313,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3441,7 +3335,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3461,7 +3355,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3472,7 +3365,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3494,7 +3386,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3528,7 +3419,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3540,7 +3430,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3566,7 +3455,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3601,7 +3489,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3614,7 +3501,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3646,7 +3532,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3664,7 +3550,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3675,7 +3560,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3698,7 +3582,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3718,7 +3602,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3729,7 +3612,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3753,7 +3635,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3771,7 +3653,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3782,7 +3663,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3805,7 +3685,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3825,7 +3705,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3836,7 +3715,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3867,7 +3745,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3890,7 +3767,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3912,7 +3788,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3945,7 +3820,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3970,7 +3844,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3992,7 +3865,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4027,7 +3899,6 @@ ; SI-NEXT: v_max_i32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4051,7 +3922,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v4, v3 ; VI-NEXT: v_max_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4074,7 +3944,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_max_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4108,7 +3977,6 @@ ; SI-NEXT: v_max_i32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4134,7 +4002,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, v0 ; VI-NEXT: v_max_i32_e32 v0, v1, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4156,7 +4023,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_max_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4195,7 +4061,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4227,7 +4092,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_i32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4250,7 +4114,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4288,7 +4151,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4322,7 +4184,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_i32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4345,7 +4206,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_i32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4385,7 +4245,6 @@ ; SI-NEXT: v_max_i32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4418,7 +4277,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_max_i32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4441,7 +4299,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_i32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4479,7 +4336,6 @@ ; SI-NEXT: v_max_i32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4514,7 +4370,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s34 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_max_i32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4537,7 +4392,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_i32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4575,7 +4429,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4607,7 +4460,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_max_i32_e32 v0, s2, v1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4636,7 +4488,6 @@ ; GFX9-NEXT: .LBB72_1: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_max_i32_e32 v0, s2, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4677,7 +4528,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4717,7 +4567,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: v_max_i32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4751,7 +4600,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_i32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4794,7 +4642,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4824,7 +4671,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_max_i32_e32 v0, s2, v1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4853,7 +4699,6 @@ ; GFX9-NEXT: .LBB74_1: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_max_i32_e32 v0, s2, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4893,7 +4738,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4931,7 +4775,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: v_max_i32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4965,7 +4808,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_i32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5006,7 +4848,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5029,7 +4870,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5051,7 +4891,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5084,7 +4923,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5109,7 +4947,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5131,7 +4968,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5166,7 +5002,6 @@ ; SI-NEXT: v_max_u32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5190,7 +5025,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v4, v3 ; VI-NEXT: v_max_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5213,7 +5047,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_max_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5247,7 +5080,6 @@ ; SI-NEXT: v_max_u32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5273,7 +5105,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, v0 ; VI-NEXT: v_max_u32_e32 v0, v1, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5295,7 +5126,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_max_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5334,7 +5164,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5366,7 +5195,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_u32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5389,7 +5217,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5427,7 +5254,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5461,7 +5287,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_u32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5484,7 +5309,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_u32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5524,7 +5348,6 @@ ; SI-NEXT: v_max_u32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5557,7 +5380,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_max_u32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5580,7 +5402,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_u32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5618,7 +5439,6 @@ ; SI-NEXT: v_max_u32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5653,7 +5473,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s34 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_max_u32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5676,7 +5495,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_u32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5714,7 +5532,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5746,7 +5563,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_max_u32_e32 v0, s2, v1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5775,7 +5591,6 @@ ; GFX9-NEXT: .LBB84_1: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_max_u32_e32 v0, s2, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5816,7 +5631,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5856,7 +5670,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: v_max_u32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5890,7 +5703,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_u32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5934,7 +5746,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5972,7 +5783,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: v_max_u32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6006,7 +5816,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_max_u32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6047,7 +5856,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6070,7 +5878,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6092,7 +5899,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6125,7 +5931,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6150,7 +5955,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6172,7 +5976,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6207,7 +6010,6 @@ ; SI-NEXT: v_min_u32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6231,7 +6033,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v4, v3 ; VI-NEXT: v_min_u32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6254,7 +6055,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_min_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6288,7 +6088,6 @@ ; SI-NEXT: v_min_u32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6314,7 +6113,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, v0 ; VI-NEXT: v_min_u32_e32 v0, v1, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6336,7 +6134,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_min_u32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6375,7 +6172,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6407,7 +6203,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_u32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6430,7 +6225,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6468,7 +6262,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6502,7 +6295,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_u32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6525,7 +6317,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_u32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6565,7 +6356,6 @@ ; SI-NEXT: v_min_u32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6598,7 +6388,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_min_u32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6621,7 +6410,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_u32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6659,7 +6447,6 @@ ; SI-NEXT: v_min_u32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6694,7 +6481,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s34 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_min_u32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6717,7 +6503,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_u32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6754,7 +6539,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6777,7 +6561,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6799,7 +6582,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6832,7 +6614,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v6, v4 ; SI-NEXT: v_mov_b32_e32 v5, v3 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[5:6], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6857,7 +6638,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6879,7 +6659,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6914,7 +6693,6 @@ ; SI-NEXT: v_min_i32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6938,7 +6716,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v4, v3 ; VI-NEXT: v_min_i32_e32 v3, v4, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6961,7 +6738,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_min_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6995,7 +6771,6 @@ ; SI-NEXT: v_min_i32_e32 v4, v5, v2 ; SI-NEXT: v_mov_b32_e32 v3, v4 ; SI-NEXT: v_mov_b32_e32 v4, v5 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7021,7 +6796,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, v0 ; VI-NEXT: v_min_i32_e32 v0, v1, v2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[3:4], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7043,7 +6817,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, v3 ; GFX9-NEXT: v_min_i32_e32 v3, v4, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7082,7 +6855,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7114,7 +6886,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_i32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7137,7 +6908,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7175,7 +6945,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v4, v2 ; SI-NEXT: v_mov_b32_e32 v3, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[3:4], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7209,7 +6978,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_min_i32_e32 v0, s6, v1 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7232,7 +7000,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_min_i32_e32 v0, s6, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7272,7 +7039,6 @@ ; SI-NEXT: v_min_i32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7305,7 +7071,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 ; VI-NEXT: v_min_i32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7328,7 +7093,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_i32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7366,7 +7130,6 @@ ; SI-NEXT: v_min_i32_e32 v3, s34, v4 ; SI-NEXT: v_mov_b32_e32 v2, v3 ; SI-NEXT: v_mov_b32_e32 v3, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7401,7 +7164,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s34 ; VI-NEXT: v_mov_b32_e32 v3, s35 ; VI-NEXT: v_min_i32_e32 v0, s6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7424,7 +7186,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_i32_e32 v2, s6, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7462,7 +7223,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7494,7 +7254,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_min_i32_e32 v0, s2, v1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7523,7 +7282,6 @@ ; GFX9-NEXT: .LBB103_1: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_min_i32_e32 v0, s2, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7564,7 +7322,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7604,7 +7361,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: v_min_i32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7638,7 +7394,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_i32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7677,7 +7432,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7703,7 +7457,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_min_i32_e32 v0, s4, v1 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7728,7 +7481,6 @@ ; GFX9-NEXT: .LBB105_1: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_min_i32_e32 v0, s4, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7767,7 +7519,6 @@ ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: v_mov_b32_e32 v2, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap v[2:3], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7805,7 +7556,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: v_min_i32_e32 v0, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap v0, v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7839,7 +7589,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_min_i32_e32 v2, s2, v3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap v0, v1, v[2:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7871,7 +7620,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7905,7 +7653,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7917,7 +7664,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7943,7 +7689,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7978,7 +7723,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7991,7 +7735,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8023,7 +7766,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8041,7 +7784,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8052,7 +7794,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8075,7 +7816,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8095,7 +7836,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8106,7 +7846,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8130,7 +7869,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8148,7 +7887,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8159,7 +7897,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8182,7 +7919,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8202,7 +7939,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8213,7 +7949,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8235,7 +7970,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8269,7 +8003,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8281,7 +8014,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8307,7 +8039,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8342,7 +8073,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec v2, v[0:1], s[4:7], 0 addr64 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8355,7 +8085,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8387,7 +8116,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec v1, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8405,7 +8134,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8416,7 +8144,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8439,7 +8166,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec v1, off, s[4:7], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8459,7 +8186,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8470,7 +8196,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v1, s[4:5] offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8494,7 +8219,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec v0, off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8512,7 +8237,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8523,7 +8247,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v0, v1, s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8546,7 +8269,7 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec v0, off, s[4:7], 0 offset:16 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8566,7 +8289,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s34 ; VI-NEXT: v_mov_b32_e32 v1, s35 ; VI-NEXT: v_mov_b32_e32 v2, s6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8577,7 +8299,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec v0, v0, v1, s[4:5] offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -12,7 +12,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -26,7 +25,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -39,7 +37,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -64,7 +61,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -84,7 +80,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -99,7 +94,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -125,7 +119,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -145,7 +138,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -162,7 +154,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -190,7 +181,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -210,7 +200,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -231,7 +220,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -256,7 +244,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -272,7 +259,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -285,7 +271,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -307,7 +292,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -327,7 +311,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -344,7 +327,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -369,7 +351,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -387,7 +368,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -404,7 +384,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -431,7 +410,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_add_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -449,7 +427,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -470,7 +447,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -492,7 +468,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -506,7 +481,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -519,7 +493,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -544,7 +517,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -564,7 +536,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -579,7 +550,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -605,7 +575,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -625,7 +594,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -642,7 +610,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -670,7 +637,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -690,7 +656,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -711,7 +676,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -736,7 +700,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -752,7 +715,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -765,7 +727,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -787,7 +748,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -807,7 +767,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -824,7 +783,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -849,7 +807,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -867,7 +824,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -884,7 +840,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -911,7 +866,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_and_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -929,7 +883,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -950,7 +903,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -972,7 +924,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -986,7 +937,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -999,7 +949,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1024,7 +973,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1044,7 +992,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1059,7 +1006,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1085,7 +1031,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1105,7 +1050,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1122,7 +1066,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1150,7 +1093,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1170,7 +1112,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1191,7 +1132,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1216,7 +1156,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1232,7 +1171,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1245,7 +1183,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1267,7 +1204,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1287,7 +1223,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1304,7 +1239,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1329,7 +1263,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1347,7 +1280,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1364,7 +1296,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1391,7 +1322,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_sub_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -1409,7 +1339,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1430,7 +1359,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1452,7 +1380,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_endpgm ; @@ -1464,7 +1391,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_endpgm ; @@ -1475,7 +1401,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -1498,7 +1423,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1517,7 +1441,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1531,7 +1454,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -1556,7 +1478,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -1574,7 +1495,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -1589,7 +1509,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -1615,7 +1534,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -1634,7 +1552,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -1654,7 +1571,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -1678,7 +1594,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -1692,7 +1607,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; @@ -1703,7 +1617,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -1723,7 +1636,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_mov_b32 s0, s6 ; CI-NEXT: s_mov_b32 s1, s7 @@ -1742,7 +1654,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smax_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -1758,7 +1669,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -1782,7 +1692,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm ; @@ -1798,7 +1707,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -1813,7 +1721,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -1838,7 +1745,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smax_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -1855,7 +1761,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smax_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -1875,7 +1780,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smax_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -1896,7 +1800,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_endpgm ; @@ -1908,7 +1811,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_endpgm ; @@ -1919,7 +1821,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -1942,7 +1843,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1961,7 +1861,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1975,7 +1874,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -2000,7 +1898,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -2018,7 +1915,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -2033,7 +1929,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -2059,7 +1954,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -2078,7 +1972,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -2098,7 +1991,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -2122,7 +2014,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -2136,7 +2027,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; @@ -2147,7 +2037,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2167,7 +2056,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_mov_b32 s0, s6 ; CI-NEXT: s_mov_b32 s1, s7 @@ -2186,7 +2074,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umax_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -2202,7 +2089,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -2226,7 +2112,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm ; @@ -2242,7 +2127,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -2257,7 +2141,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2282,7 +2165,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umax_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -2299,7 +2181,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umax_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -2319,7 +2200,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umax_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -2340,7 +2220,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_endpgm ; @@ -2352,7 +2231,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_endpgm ; @@ -2363,7 +2241,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -2386,7 +2263,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2405,7 +2281,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2419,7 +2294,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -2444,7 +2318,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -2462,7 +2335,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -2477,7 +2349,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -2503,7 +2374,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -2522,7 +2392,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -2542,7 +2411,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -2566,7 +2434,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -2580,7 +2447,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; @@ -2591,7 +2457,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2611,7 +2476,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_mov_b32 s0, s6 ; CI-NEXT: s_mov_b32 s1, s7 @@ -2630,7 +2494,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_smin_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -2646,7 +2509,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -2670,7 +2532,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm ; @@ -2686,7 +2547,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -2701,7 +2561,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -2726,7 +2585,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_smin_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -2743,7 +2601,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_smin_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -2763,7 +2620,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_smin_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -2784,7 +2640,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_endpgm ; @@ -2796,7 +2651,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_endpgm ; @@ -2807,7 +2661,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -2830,7 +2683,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2849,7 +2701,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2863,7 +2714,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -2888,7 +2738,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -2906,7 +2755,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -2921,7 +2769,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -2947,7 +2794,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -2966,7 +2812,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -2986,7 +2831,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -3010,7 +2854,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -3024,7 +2867,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; @@ -3035,7 +2877,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -3055,7 +2896,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_mov_b32 s0, s6 ; CI-NEXT: s_mov_b32 s1, s7 @@ -3074,7 +2914,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_atomic_umin_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_mov_b32 s0, s6 ; VI-NEXT: s_mov_b32 s1, s7 @@ -3090,7 +2929,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] @@ -3114,7 +2952,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm ; @@ -3130,7 +2967,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -3145,7 +2981,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -3170,7 +3005,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_atomic_umin_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 @@ -3187,7 +3021,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_atomic_umin_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 @@ -3207,7 +3040,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_atomic_umin_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -3228,7 +3060,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3242,7 +3073,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3255,7 +3085,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3280,7 +3109,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3300,7 +3128,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3315,7 +3142,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3341,7 +3167,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3361,7 +3186,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3378,7 +3202,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3406,7 +3229,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3426,7 +3248,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3447,7 +3268,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3472,7 +3292,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3488,7 +3307,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3501,7 +3319,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3523,7 +3340,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3543,7 +3359,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3560,7 +3375,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3585,7 +3399,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3603,7 +3416,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3620,7 +3432,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3647,7 +3458,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_or_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3665,7 +3475,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3686,7 +3495,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3708,7 +3516,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3722,7 +3529,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3735,7 +3541,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3755,7 +3560,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3769,7 +3573,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3782,7 +3585,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3802,7 +3604,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3816,7 +3617,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3829,7 +3629,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3854,7 +3653,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3874,7 +3672,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3889,7 +3686,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3915,7 +3711,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -3935,7 +3730,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3952,7 +3746,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3980,7 +3773,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4000,7 +3792,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4021,7 +3812,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4046,7 +3836,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4062,7 +3851,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4075,7 +3863,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4097,7 +3884,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4117,7 +3903,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4134,7 +3919,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4159,7 +3943,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4177,7 +3960,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4194,7 +3976,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4221,7 +4002,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_swap_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4239,7 +4019,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4260,7 +4039,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4282,7 +4060,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4296,7 +4073,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4309,7 +4085,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4334,7 +4109,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4354,7 +4128,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4369,7 +4142,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4395,7 +4167,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4415,7 +4186,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4432,7 +4202,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4460,7 +4229,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4480,7 +4248,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4501,7 +4268,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4526,7 +4292,6 @@ ; CI-NEXT: s_mov_b32 s5, s1 ; CI-NEXT: v_mov_b32_e32 v0, s2 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4542,7 +4307,6 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4555,7 +4319,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4577,7 +4340,6 @@ ; CI-NEXT: s_mov_b32 s1, s5 ; CI-NEXT: v_mov_b32_e32 v0, s8 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4597,7 +4359,6 @@ ; VI-NEXT: s_mov_b32 s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s8 ; VI-NEXT: v_mov_b32_e32 v1, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4614,7 +4375,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4639,7 +4399,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], v[2:3], s[4:7], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4657,7 +4416,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4674,7 +4432,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4701,7 +4458,6 @@ ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_xor_x2 v[0:1], v[2:3], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4719,7 +4475,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4740,7 +4495,6 @@ ; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 3 ; GFX9-NEXT: s_add_u32 s0, s0, s4 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4767,7 +4521,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s7 ; CI-NEXT: v_mov_b32_e32 v2, s8 ; CI-NEXT: v_mov_b32_e32 v3, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4786,7 +4539,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s8 ; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4802,7 +4554,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v4, v[0:3], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4828,7 +4579,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s7 ; CI-NEXT: v_mov_b32_e32 v2, s8 ; CI-NEXT: v_mov_b32_e32 v3, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], s4 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4848,7 +4598,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s8 ; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], s4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4864,7 +4613,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v4, v[0:3], s[4:5] offset:2368 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4890,7 +4638,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s5 ; CI-NEXT: v_mov_b32_e32 v2, s6 ; CI-NEXT: v_mov_b32_e32 v3, s7 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4911,7 +4658,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4927,7 +4673,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4956,7 +4701,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s6 ; CI-NEXT: v_mov_b32_e32 v3, s7 ; CI-NEXT: v_mov_b32_e32 v5, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[4:5], s[8:11], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -4977,7 +4721,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4995,7 +4738,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 ; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v4, v[0:3], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5026,7 +4768,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s12 ; CI-NEXT: v_mov_b32_e32 v3, s13 ; CI-NEXT: v_mov_b32_e32 v5, s11 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[4:5], s[4:7], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5049,7 +4790,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v4, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5073,7 +4813,6 @@ ; GFX9-NEXT: s_addc_u32 s3, s5, s3 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[2:3] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5102,7 +4841,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s7 ; CI-NEXT: v_mov_b32_e32 v2, s8 ; CI-NEXT: v_mov_b32_e32 v3, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5121,7 +4859,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s8 ; VI-NEXT: v_mov_b32_e32 v3, s9 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5137,7 +4874,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v4, v[0:3], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5160,7 +4896,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s5 ; CI-NEXT: v_mov_b32_e32 v2, s6 ; CI-NEXT: v_mov_b32_e32 v3, s7 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[8:11], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5181,7 +4916,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], off, s[8:11], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5199,7 +4933,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5227,7 +4960,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s6 ; CI-NEXT: v_mov_b32_e32 v3, s7 ; CI-NEXT: v_mov_b32_e32 v4, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[4:5], s[8:11], 0 addr64 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5246,7 +4978,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s6 ; VI-NEXT: v_mov_b32_e32 v3, s7 ; VI-NEXT: v_mov_b32_e32 v4, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5264,7 +4995,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: s_addc_u32 s1, s1, s5 ; GFX9-NEXT: v_mov_b32_e32 v3, s7 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5294,7 +5024,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s12 ; CI-NEXT: v_mov_b32_e32 v3, s13 ; CI-NEXT: v_mov_b32_e32 v5, s11 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[4:5], s[4:7], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5315,7 +5044,6 @@ ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v4, s2 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5339,7 +5067,6 @@ ; GFX9-NEXT: s_addc_u32 s3, s5, s3 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[2:3] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5364,7 +5091,6 @@ ; CI-NEXT: s_mov_b32 s5, s3 ; CI-NEXT: s_mov_b32 s2, s6 ; CI-NEXT: s_mov_b32 s3, s7 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5381,7 +5107,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5394,7 +5119,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5420,7 +5145,6 @@ ; CI-NEXT: s_mov_b32 s5, s3 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_mov_b32 s3, s7 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5437,7 +5161,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, -1 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5450,7 +5173,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] offset:-32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5472,7 +5195,6 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b32 s4, s0 ; CI-NEXT: s_mov_b32 s5, s1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5489,7 +5211,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5502,7 +5223,7 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5529,7 +5250,6 @@ ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: s_mov_b32 s7, s3 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5550,7 +5270,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5568,7 +5287,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5597,7 +5315,6 @@ ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: s_mov_b32 s7, s3 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5616,7 +5333,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, s5 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5634,7 +5350,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5662,7 +5377,6 @@ ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: s_mov_b32 s7, s3 ; CI-NEXT: v_mov_b32_e32 v1, s9 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5683,7 +5397,6 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5701,7 +5414,6 @@ ; GFX9-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5726,7 +5438,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_mov_b32 s4, s2 ; CI-NEXT: s_mov_b32 s5, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:32 ; CI-NEXT: s_endpgm ; @@ -5740,7 +5451,6 @@ ; VI-NEXT: s_addc_u32 s1, s3, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -5751,7 +5461,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -5771,7 +5480,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_mov_b32 s4, s2 ; CI-NEXT: s_mov_b32 s5, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; CI-NEXT: s_endpgm ; @@ -5783,7 +5491,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -5794,7 +5501,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; GFX9-NEXT: s_endpgm entry: @@ -5816,7 +5522,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s4 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -5834,7 +5539,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -5849,7 +5553,6 @@ ; GFX9-NEXT: s_add_u32 s0, s6, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: s_addc_u32 s1, s7, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -5873,7 +5576,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s4 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm ; @@ -5889,7 +5591,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -5904,7 +5605,6 @@ ; GFX9-NEXT: s_add_u32 s0, s6, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: s_addc_u32 s1, s7, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm entry: @@ -5927,7 +5627,6 @@ ; CI-NEXT: v_mov_b32_e32 v2, s4 ; CI-NEXT: s_mov_b64 s[0:1], s[6:7] ; CI-NEXT: v_mov_b32_e32 v3, s5 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[0:3], 0 addr64 offset:32 ; CI-NEXT: s_endpgm ; @@ -5945,7 +5644,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; @@ -5960,7 +5658,6 @@ ; GFX9-NEXT: s_add_u32 s0, s6, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: s_addc_u32 s1, s7, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm entry: @@ -5979,7 +5676,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -5993,7 +5689,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6006,7 +5701,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6031,7 +5725,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -6051,7 +5744,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6066,7 +5758,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6092,7 +5783,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_inc_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -6112,7 +5802,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6129,7 +5818,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6150,7 +5838,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[0:3], 0 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -6164,7 +5851,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[0:3], 0 offset:32 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6177,7 +5863,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6202,7 +5887,6 @@ ; CI-NEXT: v_mov_b32_e32 v1, s9 ; CI-NEXT: s_mov_b32 s6, s2 ; CI-NEXT: s_mov_b32 s7, s3 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -6222,7 +5906,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s9 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6237,7 +5920,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6263,7 +5945,6 @@ ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CI-NEXT: buffer_atomic_dec_x2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: buffer_wbinvl1_vol @@ -6283,7 +5964,6 @@ ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6300,7 +5980,6 @@ ; GFX9-NEXT: s_add_u32 s0, s4, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: s_addc_u32 s1, s5, s1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll @@ -15,7 +15,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -49,7 +48,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -61,7 +59,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -87,7 +84,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -123,7 +119,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -137,7 +132,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -171,7 +165,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -190,7 +184,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -202,7 +195,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -225,7 +217,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -246,7 +238,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -258,7 +249,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -284,7 +274,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -303,7 +293,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -315,7 +304,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -338,7 +326,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -359,7 +347,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -371,7 +358,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -452,7 +438,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -549,7 +534,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -561,7 +545,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -652,7 +635,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -755,7 +737,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -769,7 +750,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -877,7 +857,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -896,7 +876,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -908,7 +887,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1009,7 +987,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1030,7 +1008,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1042,7 +1019,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1145,7 +1121,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1164,7 +1140,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1176,7 +1151,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1280,7 +1254,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_swap_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1301,7 +1275,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1313,7 +1286,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_swap_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1335,7 +1307,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1369,7 +1340,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1381,7 +1351,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1407,7 +1376,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1443,7 +1411,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1457,7 +1424,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1491,7 +1457,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1510,7 +1476,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1522,7 +1487,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1545,7 +1509,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1566,7 +1530,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1578,7 +1541,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1604,7 +1566,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1623,7 +1585,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1635,7 +1596,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1658,7 +1618,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_add_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1679,7 +1639,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1691,7 +1650,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_add_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1713,7 +1671,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1747,7 +1704,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1759,7 +1715,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1785,7 +1740,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1821,7 +1775,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1835,7 +1788,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1869,7 +1821,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1888,7 +1840,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1900,7 +1851,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1923,7 +1873,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -1944,7 +1894,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -1956,7 +1905,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -1982,7 +1930,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2001,7 +1949,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2013,7 +1960,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2036,7 +1982,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_sub_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2057,7 +2003,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2069,7 +2014,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_sub_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2091,7 +2035,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2125,7 +2068,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2137,7 +2079,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2163,7 +2104,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2199,7 +2139,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2213,7 +2152,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2247,7 +2185,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2266,7 +2204,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2278,7 +2215,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2301,7 +2237,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2322,7 +2258,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2334,7 +2269,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2360,7 +2294,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2379,7 +2313,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2391,7 +2324,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2414,7 +2346,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_and_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2435,7 +2367,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2447,7 +2378,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_and_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2483,7 +2413,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2510,7 +2439,6 @@ ; VI-NEXT: v_and_b32_e32 v8, v6, v2 ; VI-NEXT: v_not_b32_e32 v5, v4 ; VI-NEXT: v_not_b32_e32 v4, v8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2536,7 +2464,6 @@ ; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 ; GFX9-NEXT: v_not_b32_e32 v5, v4 ; GFX9-NEXT: v_not_b32_e32 v4, v8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2575,7 +2502,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2604,7 +2530,6 @@ ; VI-NEXT: v_and_b32_e32 v8, v6, v2 ; VI-NEXT: v_not_b32_e32 v5, v4 ; VI-NEXT: v_not_b32_e32 v4, v8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2630,7 +2555,6 @@ ; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 ; GFX9-NEXT: v_not_b32_e32 v5, v4 ; GFX9-NEXT: v_not_b32_e32 v4, v8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2676,7 +2600,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2703,7 +2626,6 @@ ; VI-NEXT: v_and_b32_e32 v8, v6, v2 ; VI-NEXT: v_not_b32_e32 v5, v4 ; VI-NEXT: v_not_b32_e32 v4, v8 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2731,7 +2653,6 @@ ; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 ; GFX9-NEXT: v_not_b32_e32 v5, v4 ; GFX9-NEXT: v_not_b32_e32 v4, v8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2776,7 +2697,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2805,7 +2725,6 @@ ; VI-NEXT: v_and_b32_e32 v1, v8, v2 ; VI-NEXT: v_not_b32_e32 v7, v0 ; VI-NEXT: v_not_b32_e32 v6, v1 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2831,7 +2750,6 @@ ; GFX9-NEXT: v_and_b32_e32 v8, v6, v2 ; GFX9-NEXT: v_not_b32_e32 v5, v4 ; GFX9-NEXT: v_not_b32_e32 v4, v8 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2877,7 +2795,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -2913,7 +2830,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_not_b32_e32 v1, v0 ; VI-NEXT: v_not_b32_e32 v0, v6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -2940,7 +2856,6 @@ ; GFX9-NEXT: v_and_b32_e32 v5, s6, v2 ; GFX9-NEXT: v_not_b32_e32 v1, v0 ; GFX9-NEXT: v_not_b32_e32 v0, v5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -2985,7 +2900,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3023,7 +2937,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_not_b32_e32 v1, v0 ; VI-NEXT: v_not_b32_e32 v0, v6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3050,7 +2963,6 @@ ; GFX9-NEXT: v_and_b32_e32 v5, s6, v2 ; GFX9-NEXT: v_not_b32_e32 v1, v0 ; GFX9-NEXT: v_not_b32_e32 v0, v5 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3098,7 +3010,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3136,7 +3047,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_not_b32_e32 v1, v0 ; VI-NEXT: v_not_b32_e32 v0, v6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3163,7 +3073,6 @@ ; GFX9-NEXT: v_and_b32_e32 v1, s6, v5 ; GFX9-NEXT: v_not_b32_e32 v4, v0 ; GFX9-NEXT: v_not_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3208,7 +3117,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3248,7 +3156,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_not_b32_e32 v1, v0 ; VI-NEXT: v_not_b32_e32 v0, v6 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3275,7 +3182,6 @@ ; GFX9-NEXT: v_and_b32_e32 v1, s6, v5 ; GFX9-NEXT: v_not_b32_e32 v4, v0 ; GFX9-NEXT: v_not_b32_e32 v3, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3303,7 +3209,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3337,7 +3242,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3349,7 +3253,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3375,7 +3278,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3411,7 +3313,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3425,7 +3326,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3459,7 +3359,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3478,7 +3378,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3490,7 +3389,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3513,7 +3411,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3534,7 +3432,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3546,7 +3443,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3572,7 +3468,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3591,7 +3487,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3603,7 +3498,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3626,7 +3520,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_or_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3647,7 +3541,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3659,7 +3552,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_or_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3681,7 +3573,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3715,7 +3606,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3727,7 +3617,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3753,7 +3642,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3789,7 +3677,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3803,7 +3690,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3837,7 +3723,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3856,7 +3742,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3868,7 +3753,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3891,7 +3775,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3912,7 +3796,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3924,7 +3807,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -3950,7 +3832,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -3969,7 +3851,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -3981,7 +3862,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4004,7 +3884,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_xor_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4025,7 +3905,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4037,7 +3916,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_xor_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4072,7 +3950,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4098,7 +3975,6 @@ ; VI-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4123,7 +3999,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4161,7 +4036,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4189,7 +4063,6 @@ ; VI-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4214,7 +4087,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4259,7 +4131,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4285,7 +4156,6 @@ ; VI-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4312,7 +4182,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4356,7 +4225,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4384,7 +4252,6 @@ ; VI-NEXT: v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; VI-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4409,7 +4276,6 @@ ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4456,7 +4322,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4493,7 +4358,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4521,7 +4385,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4567,7 +4430,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4606,7 +4468,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4634,7 +4495,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4683,7 +4543,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4722,7 +4581,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4750,7 +4608,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4796,7 +4653,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4837,7 +4693,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4865,7 +4720,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -4909,7 +4763,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -4946,7 +4799,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -4980,7 +4832,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5026,7 +4877,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5070,7 +4920,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s7 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5107,7 +4956,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5156,7 +5004,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5191,7 +5038,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5225,7 +5071,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5270,7 +5115,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5312,7 +5156,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5349,7 +5192,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5394,7 +5236,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5420,7 +5261,6 @@ ; VI-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5445,7 +5285,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5483,7 +5322,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5511,7 +5349,6 @@ ; VI-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5536,7 +5373,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5581,7 +5417,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5607,7 +5442,6 @@ ; VI-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5634,7 +5468,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5678,7 +5511,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5706,7 +5538,6 @@ ; VI-NEXT: v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; VI-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5731,7 +5562,6 @@ ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5778,7 +5608,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5815,7 +5644,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5843,7 +5671,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -5889,7 +5716,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -5928,7 +5754,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -5956,7 +5781,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6005,7 +5829,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6044,7 +5867,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6072,7 +5894,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6118,7 +5939,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6159,7 +5979,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6187,7 +6006,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6231,7 +6049,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6268,7 +6085,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6302,7 +6118,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6348,7 +6163,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6392,7 +6206,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s7 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6429,7 +6242,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6477,7 +6289,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6519,7 +6330,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6556,7 +6366,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6601,7 +6410,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6627,7 +6435,6 @@ ; VI-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6652,7 +6459,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6690,7 +6496,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6718,7 +6523,6 @@ ; VI-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6743,7 +6547,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6788,7 +6591,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6814,7 +6616,6 @@ ; VI-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6841,7 +6642,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6885,7 +6685,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -6913,7 +6712,6 @@ ; VI-NEXT: v_cmp_le_u64_e32 vcc, v[8:9], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; VI-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -6938,7 +6736,6 @@ ; GFX9-NEXT: v_cmp_le_u64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -6985,7 +6782,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7022,7 +6818,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7050,7 +6845,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7096,7 +6890,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7135,7 +6928,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7163,7 +6955,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7212,7 +7003,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7251,7 +7041,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7279,7 +7068,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7325,7 +7113,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7366,7 +7153,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7394,7 +7180,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7435,7 +7220,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7461,7 +7245,6 @@ ; VI-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7486,7 +7269,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7524,7 +7306,6 @@ ; SI-NEXT: v_mov_b32_e32 v10, v6 ; SI-NEXT: v_mov_b32_e32 v9, v5 ; SI-NEXT: v_mov_b32_e32 v8, v4 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7552,7 +7333,6 @@ ; VI-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7577,7 +7357,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7622,7 +7401,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7648,7 +7426,6 @@ ; VI-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; VI-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7675,7 +7452,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7719,7 +7495,6 @@ ; SI-NEXT: v_mov_b32_e32 v1, v9 ; SI-NEXT: v_mov_b32_e32 v2, v10 ; SI-NEXT: v_mov_b32_e32 v3, v11 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v[6:7], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7747,7 +7522,6 @@ ; VI-NEXT: v_cmp_le_i64_e32 vcc, v[8:9], v[2:3] ; VI-NEXT: v_cndmask_b32_e32 v7, v3, v9, vcc ; VI-NEXT: v_cndmask_b32_e32 v6, v2, v8, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7772,7 +7546,6 @@ ; GFX9-NEXT: v_cmp_le_i64_e32 vcc, v[6:7], v[2:3] ; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v7, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7819,7 +7592,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7856,7 +7628,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7884,7 +7655,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -7930,7 +7700,6 @@ ; SI-NEXT: v_mov_b32_e32 v7, v3 ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[5:8], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -7969,7 +7738,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -7997,7 +7765,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8046,7 +7813,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8085,7 +7851,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s5 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8113,7 +7878,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8159,7 +7923,6 @@ ; SI-NEXT: v_mov_b32_e32 v4, v6 ; SI-NEXT: v_mov_b32_e32 v5, v7 ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[3:6], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8200,7 +7963,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s35 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8228,7 +7990,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8272,7 +8033,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8309,7 +8069,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8343,7 +8102,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8389,7 +8147,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8433,7 +8190,6 @@ ; VI-NEXT: v_mov_b32_e32 v5, s7 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8470,7 +8226,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8517,7 +8272,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[0:3], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8548,7 +8302,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8578,7 +8331,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, s2 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8622,7 +8374,6 @@ ; SI-NEXT: v_mov_b32_e32 v6, v2 ; SI-NEXT: v_mov_b32_e32 v5, v1 ; SI-NEXT: v_mov_b32_e32 v4, v0 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_cmpswap_x2 v[4:7], off, s[8:11], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8664,7 +8415,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: v_cndmask_b32_e32 v1, v0, v3, vcc ; VI-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8701,7 +8451,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v5, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_cmpswap_x2 v[0:1], v2, v[3:6], s[0:1] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8733,7 +8482,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8767,7 +8515,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8779,7 +8526,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8805,7 +8551,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8841,7 +8586,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8855,7 +8599,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8889,7 +8632,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8908,7 +8651,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8920,7 +8662,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -8943,7 +8684,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -8964,7 +8705,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -8976,7 +8716,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9002,7 +8741,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9021,7 +8760,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9033,7 +8771,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9056,7 +8793,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_inc_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9077,7 +8814,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9089,7 +8825,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9111,7 +8846,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[2:3], v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9145,7 +8879,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9157,7 +8890,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9183,7 +8915,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9219,7 +8950,6 @@ ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s4, s6 ; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[2:3], v[0:1], s[4:7], 0 addr64 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9233,7 +8963,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9267,7 +8996,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v1, s35 ; SI-NEXT: v_mov_b32_e32 v2, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[1:2], off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9286,7 +9015,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9298,7 +9026,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9321,7 +9048,7 @@ ; SI-NEXT: v_mov_b32_e32 v2, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[1:2], off, s[4:7], 0 offset:32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9342,7 +9069,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9354,7 +9080,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v2, v[0:1], s[4:5] offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9380,7 +9105,7 @@ ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s35 ; SI-NEXT: v_mov_b32_e32 v1, s34 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[4:7], 0 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9399,7 +9124,6 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v2, s4 ; VI-NEXT: v_mov_b32_e32 v3, s5 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9411,7 +9135,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[4:5] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol @@ -9434,7 +9157,7 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_atomic_dec_x2 v[0:1], off, s[4:7], 0 offset:32 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_wbinvl1 @@ -9455,7 +9178,6 @@ ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: v_mov_b32_e32 v3, s35 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_wbinvl1_vol @@ -9467,7 +9189,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_atomic_dec_x2 v[0:1], v2, v[0:1], s[4:5] offset:32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll b/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll @@ -43,7 +43,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] @@ -63,7 +62,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -84,7 +82,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -4,8 +4,8 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX9 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX9 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s @@ -186,7 +186,8 @@ ; GCN-LABEL: {{^}}gws_barrier_fence_before: ; NOLOOP: s_mov_b32 m0, 0{{$}} ; NOLOOP: store_{{dword|b32}} -; NOLOOP: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9: s_waitcnt vmcnt(0) +; GFX10: s_waitcnt_vscnt null, 0x0 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, ptr addrspace(1) %ptr) #0 { @@ -201,7 +202,6 @@ ; NOLOOP: s_mov_b32 m0, 0{{$}} ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; NOLOOP-NEXT: load_{{dword|b32}} define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, ptr addrspace(1) %ptr) #0 { @@ -229,8 +229,6 @@ ; NOLOOP: s_mov_b32 m0, 0 ; NOLOOP: ds_gws_init v0 offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 - ; NOLOOP-NEXT: ds_gws_barrier v0 offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll --- a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll @@ -10,7 +10,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, 4.0 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_rtn_f32 v0, v0, v1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -19,7 +18,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -35,7 +33,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, v1 ; GFX7-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 @@ -58,7 +55,6 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, v1 ; GFX8-NEXT: v_add_f32_e32 v1, 4.0, v2 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 @@ -79,7 +75,6 @@ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, 4.0 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_f32 v0, v1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -88,7 +83,6 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_f32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -103,7 +97,6 @@ ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_add_f32_e32 v2, 4.0, v1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 @@ -125,7 +118,6 @@ ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_add_f32_e32 v2, 4.0, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 @@ -149,16 +141,13 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s4, s3, 3 ; VI-NEXT: v_mov_b32_e32 v1, s4 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_rtn_f32 v1, v1, v0 offset:32 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s3, s3, 4 ; VI-NEXT: v_mov_b32_e32 v2, s3 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_f32 v2, v0 offset:64 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_rtn_f32 v2, v0, v1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -173,16 +162,13 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s4, s3, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_f32 v1, v1, v0 offset:32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s3, s3, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_f32 v2, v0 offset:64 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -207,7 +193,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s6 ; GFX7-NEXT: v_add_f32_e32 v2, 0x42280000, v1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 @@ -225,7 +210,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_add_f32_e32 v2, 0x42280000, v1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v2, v3, v1, v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 @@ -244,7 +228,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_add_f32_e32 v3, v2, v0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v1, v2, v3 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 @@ -277,7 +260,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s6 ; GFX8-NEXT: v_add_f32_e32 v2, 0x42280000, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 @@ -295,7 +277,6 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_add_f32_e32 v2, 0x42280000, v1 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v2, v3, v1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 @@ -314,7 +295,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_add_f32_e32 v3, v2, v0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v1, v1, v2, v3 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 @@ -542,7 +522,6 @@ ; VI-NEXT: v_mov_b32_e32 v4, v1 ; VI-NEXT: v_mov_b32_e32 v3, v0 ; VI-NEXT: v_add_f64 v[0:1], v[3:4], 4.0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[3:4], v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[3:4] @@ -565,7 +544,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-NEXT: v_add_f64 v[0:1], v[3:4], 4.0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[3:4], v[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[3:4] @@ -589,7 +567,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v4, v1 ; GFX7-NEXT: v_mov_b32_e32 v3, v0 ; GFX7-NEXT: v_add_f64 v[0:1], v[3:4], 4.0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[3:4], v[0:1] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[3:4] @@ -613,7 +590,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v4, v1 ; GFX8-NEXT: v_mov_b32_e32 v3, v0 ; GFX8-NEXT: v_add_f64 v[0:1], v[3:4], 4.0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[3:4], v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[3:4] @@ -638,7 +614,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_add_f64 v[3:4], v[1:2], 4.0 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[1:2], v[3:4] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[1:2] @@ -660,7 +635,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[3:4], v[1:2], 4.0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[1:2], v[3:4] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[1:2] @@ -683,7 +657,6 @@ ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[3:4], v[1:2], 4.0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[1:2], v[3:4] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[1:2] @@ -706,7 +679,6 @@ ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[3:4], v[1:2], 4.0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[1:2], v[3:4] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[1:2] @@ -734,7 +706,6 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, v2 ; VI-NEXT: v_sub_f32_e32 v2, v3, v1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 @@ -756,7 +727,6 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-NEXT: v_sub_f32_e32 v2, v3, v1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 @@ -779,7 +749,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, v2 ; GFX7-NEXT: v_sub_f32_e32 v2, v3, v1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 @@ -802,7 +771,6 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, v2 ; GFX8-NEXT: v_sub_f32_e32 v2, v3, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 @@ -828,7 +796,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_sub_f32_e32 v3, v2, v1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 @@ -849,7 +816,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_sub_f32_e32 v3, v2, v1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 @@ -871,7 +837,6 @@ ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_sub_f32_e32 v3, v2, v1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 @@ -893,7 +858,6 @@ ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_sub_f32_e32 v3, v2, v1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 @@ -921,7 +885,6 @@ ; VI-NEXT: v_mov_b32_e32 v6, v4 ; VI-NEXT: v_mov_b32_e32 v5, v3 ; VI-NEXT: v_add_f64 v[3:4], v[5:6], -v[1:2] -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[5:6], v[3:4] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] @@ -945,7 +908,6 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-NEXT: v_mov_b32_e32 v5, v3 ; GFX9-NEXT: v_add_f64 v[3:4], v[5:6], -v[1:2] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[5:6], v[3:4] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] @@ -970,7 +932,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v6, v4 ; GFX7-NEXT: v_mov_b32_e32 v5, v3 ; GFX7-NEXT: v_add_f64 v[3:4], v[5:6], -v[1:2] -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[5:6], v[3:4] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] @@ -995,7 +956,6 @@ ; GFX8-NEXT: v_mov_b32_e32 v6, v4 ; GFX8-NEXT: v_mov_b32_e32 v5, v3 ; GFX8-NEXT: v_add_f64 v[3:4], v[5:6], -v[1:2] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b64 v[3:4], v0, v[5:6], v[3:4] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6] @@ -1022,7 +982,6 @@ ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_add_f64 v[5:6], v[3:4], -v[1:2] -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_cmpst_rtn_b64 v[5:6], v0, v[3:4], v[5:6] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u64_e32 vcc, v[5:6], v[3:4] @@ -1044,7 +1003,6 @@ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[5:6], v[3:4], -v[1:2] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_cmpst_rtn_b64 v[5:6], v0, v[3:4], v[5:6] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[5:6], v[3:4] @@ -1067,7 +1025,6 @@ ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_add_f64 v[5:6], v[3:4], -v[1:2] -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b64 v[5:6], v0, v[3:4], v[5:6] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, v[5:6], v[3:4] @@ -1090,7 +1047,6 @@ ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_add_f64 v[5:6], v[3:4], -v[1:2] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: ds_cmpst_rtn_b64 v[5:6], v0, v[3:4], v[5:6] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[5:6], v[3:4] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -43,7 +43,7 @@ # CHECK-LABEL: bb.1.atomic: # CHECK: BUFFER_ATOMIC_SMAX_ADDR64 -# CHECK-NEXT: S_WAITCNT 3952 +# CHECK-NEXT: S_WAITCNT_soft 3952 # CHECK-NEXT: BUFFER_WBINVL1_VOL name: atomic_max_i32_noret diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -798,63 +798,50 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; GFX6-LABEL: workgroup_acquire_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: workgroup_acquire_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: workgroup_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: workgroup_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: workgroup_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") acquire @@ -864,59 +851,46 @@ define amdgpu_kernel void @workgroup_release_fence() { ; GFX6-LABEL: workgroup_release_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: workgroup_release_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: workgroup_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: workgroup_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_release_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: workgroup_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: workgroup_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: workgroup_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") release @@ -926,63 +900,50 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; GFX6-LABEL: workgroup_acq_rel_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: workgroup_acq_rel_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: workgroup_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: workgroup_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: workgroup_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") acq_rel @@ -992,63 +953,50 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; GFX6-LABEL: workgroup_seq_cst_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: workgroup_seq_cst_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: workgroup_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: workgroup_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: workgroup_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("workgroup") seq_cst @@ -1066,8 +1014,6 @@ ; ; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -1085,7 +1031,6 @@ ; ; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1095,14 +1040,11 @@ ; ; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; @@ -1125,8 +1067,6 @@ ; ; GFX10-WGP-LABEL: workgroup_one_as_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: workgroup_one_as_release_fence: @@ -1143,7 +1083,6 @@ ; ; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: @@ -1152,13 +1091,10 @@ ; ; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: workgroup_one_as_release_fence: @@ -1180,8 +1116,6 @@ ; ; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -1199,7 +1133,6 @@ ; ; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1209,14 +1142,11 @@ ; ; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; @@ -1239,8 +1169,6 @@ ; ; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -1258,7 +1186,6 @@ ; ; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1268,14 +1195,11 @@ ; ; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_endpgm ; @@ -1290,73 +1214,58 @@ define amdgpu_kernel void @agent_acquire_fence() { ; GFX6-LABEL: agent_acquire_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_acquire_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_acquire_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1368,63 +1277,48 @@ define amdgpu_kernel void @agent_release_fence() { ; GFX6-LABEL: agent_release_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_release_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_release_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent") release @@ -1434,75 +1328,60 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; GFX6-LABEL: agent_acq_rel_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_acq_rel_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1514,75 +1393,60 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; GFX6-LABEL: agent_seq_cst_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_seq_cst_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1594,73 +1458,58 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; GFX6-LABEL: agent_one_as_acquire_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_one_as_acquire_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_one_as_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1672,63 +1521,48 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; GFX6-LABEL: agent_one_as_release_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_one_as_release_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_one_as_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("agent-one-as") release @@ -1738,75 +1572,60 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; GFX6-LABEL: agent_one_as_acq_rel_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_one_as_acq_rel_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1818,75 +1637,60 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; GFX6-LABEL: agent_one_as_seq_cst_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: agent_one_as_seq_cst_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: agent_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: agent_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1898,75 +1702,60 @@ define amdgpu_kernel void @system_acquire_fence() { ; GFX6-LABEL: system_acquire_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_acquire_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_acquire_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -1978,65 +1767,50 @@ define amdgpu_kernel void @system_release_fence() { ; GFX6-LABEL: system_release_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_release_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_release_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: system_release_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm entry: fence release @@ -2046,41 +1820,33 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX6-LABEL: system_acq_rel_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_acq_rel_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_acq_rel_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2088,7 +1854,6 @@ ; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2096,29 +1861,23 @@ ; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -2130,41 +1889,33 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX6-LABEL: system_seq_cst_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_seq_cst_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_seq_cst_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2172,7 +1923,6 @@ ; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2180,29 +1930,23 @@ ; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -2214,75 +1958,60 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; GFX6-LABEL: system_one_as_acquire_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_one_as_acquire_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_one_as_acquire_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_acquire_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_acquire_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -2294,65 +2023,50 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; GFX6-LABEL: system_one_as_release_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_one_as_release_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_one_as_release_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_release_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_release_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; ; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_release_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: s_endpgm entry: fence syncscope("one-as") release @@ -2362,41 +2076,33 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; GFX6-LABEL: system_one_as_acq_rel_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_one_as_acq_rel_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_acq_rel_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2404,7 +2110,6 @@ ; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2412,29 +2117,23 @@ ; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_acq_rel_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm @@ -2446,41 +2145,33 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; GFX6-LABEL: system_one_as_seq_cst_fence: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: system_one_as_seq_cst_fence: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol ; GFX7-NEXT: s_endpgm ; ; GFX10-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: buffer_gl1_inv ; GFX10-WGP-NEXT: s_endpgm ; ; GFX10-CU-LABEL: system_one_as_seq_cst_fence: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv ; GFX10-CU-NEXT: s_endpgm ; ; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2488,7 +2179,6 @@ ; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2496,29 +2186,23 @@ ; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: system_one_as_seq_cst_fence: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -429,7 +429,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -444,8 +443,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -461,8 +458,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -478,7 +473,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 @@ -492,7 +486,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -507,7 +500,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -522,7 +514,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -537,7 +528,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -551,8 +541,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -566,8 +554,6 @@ ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -819,7 +805,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -832,8 +817,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -846,8 +829,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -859,7 +840,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -870,7 +850,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -881,7 +860,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -893,7 +871,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -905,7 +882,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -917,8 +893,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -930,8 +904,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -949,7 +921,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -962,8 +933,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -976,8 +945,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -989,7 +956,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1000,7 +966,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1011,7 +976,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1023,7 +987,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1035,7 +998,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1047,8 +1009,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1060,8 +1020,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -1334,7 +1292,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1347,8 +1304,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1361,8 +1316,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1374,7 +1327,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1385,7 +1337,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1396,7 +1347,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1408,7 +1358,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1420,7 +1369,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1432,8 +1380,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1445,8 +1391,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1464,7 +1408,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1479,8 +1422,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1497,8 +1438,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1514,7 +1453,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1526,7 +1464,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1539,7 +1476,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1553,7 +1489,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -1567,7 +1502,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -1581,8 +1515,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1598,8 +1530,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1621,7 +1551,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1636,8 +1565,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1654,8 +1581,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1671,7 +1596,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1683,7 +1607,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1696,7 +1619,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1710,7 +1632,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -1724,7 +1645,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -1738,8 +1658,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1755,8 +1673,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1926,7 +1842,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1942,8 +1857,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1960,8 +1873,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1977,7 +1888,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -1990,7 +1900,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2004,7 +1913,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2019,7 +1927,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2034,7 +1941,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2049,8 +1955,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2066,8 +1970,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2090,7 +1992,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2106,8 +2007,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2124,8 +2023,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2141,7 +2038,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -2154,7 +2050,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2168,7 +2063,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2183,7 +2077,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2198,7 +2091,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2213,8 +2105,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2230,8 +2120,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2509,7 +2397,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2523,8 +2410,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2538,8 +2423,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -2553,7 +2436,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2563,7 +2445,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2573,7 +2454,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2584,7 +2464,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2595,7 +2474,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2605,8 +2483,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2616,8 +2492,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -2638,7 +2512,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2654,8 +2527,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2673,8 +2544,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2692,7 +2561,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2703,7 +2571,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2715,7 +2582,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2728,7 +2594,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2741,7 +2606,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2753,8 +2617,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2768,8 +2630,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2794,7 +2654,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2810,8 +2669,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2829,8 +2686,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2848,7 +2703,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2859,7 +2713,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2871,7 +2724,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2884,7 +2736,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2897,7 +2748,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2909,8 +2759,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2924,8 +2772,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3230,7 +3076,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3246,8 +3091,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3265,8 +3108,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3284,7 +3125,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3295,7 +3135,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3307,7 +3146,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3320,7 +3158,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3333,7 +3170,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3345,8 +3181,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3360,8 +3194,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3386,7 +3218,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3402,8 +3233,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3421,8 +3250,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3440,7 +3267,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3451,7 +3277,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3463,7 +3288,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3476,7 +3300,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3489,7 +3312,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3501,8 +3323,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3516,8 +3336,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3542,7 +3360,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3558,8 +3375,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3577,8 +3392,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3596,7 +3409,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3607,7 +3419,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3619,7 +3430,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3632,7 +3442,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3645,7 +3454,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3657,8 +3465,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3672,8 +3478,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3698,7 +3502,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3714,8 +3517,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3733,8 +3534,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3752,7 +3551,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3763,7 +3561,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3775,7 +3572,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3788,7 +3584,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3801,7 +3596,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3813,8 +3607,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3828,8 +3620,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3854,7 +3644,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3870,8 +3659,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3889,8 +3676,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3908,7 +3693,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3919,7 +3703,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3931,7 +3714,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3944,7 +3726,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3957,7 +3738,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3969,8 +3749,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3984,8 +3762,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4010,7 +3786,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4026,8 +3801,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4045,8 +3818,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4064,7 +3835,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4075,7 +3845,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4087,7 +3856,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4100,7 +3868,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4113,7 +3880,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4125,8 +3891,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4140,8 +3904,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4166,7 +3928,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4182,8 +3943,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4201,8 +3960,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4220,7 +3977,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4231,7 +3987,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4243,7 +3998,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4256,7 +4010,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4269,7 +4022,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4281,8 +4033,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4296,8 +4046,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4322,7 +4070,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4338,8 +4085,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4357,8 +4102,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4376,7 +4119,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4387,7 +4129,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4399,7 +4140,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4412,7 +4152,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4425,7 +4164,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4437,8 +4175,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4452,8 +4188,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4777,7 +4511,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -4795,8 +4528,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -4814,8 +4545,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -4833,7 +4562,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4847,7 +4575,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4859,7 +4586,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4872,7 +4598,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4885,7 +4610,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4897,8 +4621,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -4910,8 +4632,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -4936,7 +4656,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4955,8 +4674,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4976,8 +4693,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4997,7 +4712,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5011,7 +4725,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5024,7 +4737,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5038,7 +4750,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5052,7 +4763,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5065,8 +4775,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5080,8 +4788,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5108,7 +4814,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5127,8 +4832,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5148,9 +4851,7 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv @@ -5169,7 +4870,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5183,7 +4883,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5196,7 +4895,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5210,7 +4908,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5224,7 +4921,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5237,8 +4933,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5252,8 +4946,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5592,7 +5284,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5611,8 +5302,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5632,8 +5321,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5653,7 +5340,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5667,7 +5353,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5680,7 +5365,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5694,7 +5378,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5708,7 +5391,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5721,8 +5403,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5736,8 +5416,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5764,7 +5442,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5783,8 +5460,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5804,8 +5479,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5825,7 +5498,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5839,7 +5511,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5852,7 +5523,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5866,7 +5536,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5880,7 +5549,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5893,8 +5561,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5908,8 +5574,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5936,7 +5600,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5955,8 +5618,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5976,8 +5637,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5997,7 +5656,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6011,7 +5669,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6024,7 +5681,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6038,7 +5694,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6052,7 +5707,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6065,8 +5719,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6080,8 +5732,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6108,7 +5758,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6127,8 +5776,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6148,8 +5795,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6169,7 +5814,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6183,7 +5827,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6196,7 +5839,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6210,7 +5852,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6224,7 +5865,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6237,8 +5877,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6252,8 +5890,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6280,7 +5916,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6299,8 +5934,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6320,8 +5953,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6341,7 +5972,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6355,7 +5985,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6368,7 +5997,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6382,7 +6010,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6396,7 +6023,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6409,8 +6035,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6424,8 +6048,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6452,7 +6074,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6471,8 +6092,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6492,8 +6111,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6513,7 +6130,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6527,7 +6143,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6540,7 +6155,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6554,7 +6168,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6568,7 +6181,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6581,8 +6193,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6596,8 +6206,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6624,7 +6232,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6643,8 +6250,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6664,8 +6269,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6685,7 +6288,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6699,7 +6301,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6712,7 +6313,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6726,7 +6326,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6740,7 +6339,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6753,8 +6351,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6768,8 +6364,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6796,7 +6390,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6815,8 +6408,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6836,8 +6427,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6857,7 +6446,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6871,7 +6459,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6884,7 +6471,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6898,7 +6484,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6912,7 +6497,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6925,8 +6509,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6940,8 +6522,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7384,7 +6964,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -7400,8 +6979,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7418,8 +6995,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7436,7 +7011,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 @@ -7451,7 +7025,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7467,7 +7040,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7482,7 +7054,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -7498,7 +7069,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -7512,8 +7082,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7528,8 +7096,6 @@ ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7782,7 +7348,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -7795,8 +7360,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -7809,8 +7372,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -7822,7 +7383,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -7833,7 +7393,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7844,7 +7403,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7856,7 +7414,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7868,7 +7425,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7880,8 +7436,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -7893,8 +7447,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -7912,7 +7464,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -7925,8 +7476,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -7939,8 +7488,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -7952,7 +7499,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -7963,7 +7509,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7974,7 +7519,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7986,7 +7530,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7998,7 +7541,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8010,8 +7552,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8023,8 +7563,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -8293,7 +7831,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8306,8 +7843,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -8320,8 +7855,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -8333,7 +7866,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8344,7 +7876,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8355,7 +7886,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8367,7 +7897,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8379,7 +7908,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8391,8 +7919,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8404,8 +7930,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -8423,7 +7947,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8438,8 +7961,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8455,8 +7976,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8471,7 +7990,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8483,7 +8001,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8496,7 +8013,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8510,7 +8026,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -8524,7 +8039,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -8538,8 +8052,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8554,8 +8066,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8576,7 +8086,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8591,8 +8100,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8608,8 +8115,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8624,7 +8129,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8636,7 +8140,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8649,7 +8152,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8663,7 +8165,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -8677,7 +8178,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -8691,8 +8191,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8707,8 +8205,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8884,7 +8380,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8901,8 +8396,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8920,8 +8413,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8938,7 +8429,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -8951,7 +8441,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8966,7 +8455,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8981,7 +8469,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -8997,7 +8484,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9012,8 +8498,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9030,8 +8514,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9055,7 +8537,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9072,8 +8553,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9091,8 +8570,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9109,7 +8586,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -9122,7 +8598,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9137,7 +8612,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9152,7 +8626,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9168,7 +8641,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9183,8 +8655,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9201,8 +8671,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9477,7 +8945,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -9491,8 +8958,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -9506,8 +8971,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -9521,7 +8984,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -9531,7 +8993,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9541,7 +9002,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -9552,7 +9012,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9563,7 +9022,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -9573,8 +9031,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -9584,8 +9040,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -9606,7 +9060,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9622,8 +9075,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9640,8 +9091,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9658,7 +9107,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9669,7 +9117,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9681,7 +9128,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9694,7 +9140,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9707,7 +9152,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9719,8 +9163,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9733,8 +9175,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9758,7 +9198,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9774,8 +9213,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9792,8 +9229,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9810,7 +9245,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9821,7 +9255,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9833,7 +9266,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9846,7 +9278,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9859,7 +9290,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9871,8 +9301,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9885,8 +9313,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10182,7 +9608,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10198,8 +9623,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10216,8 +9639,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10234,7 +9655,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10245,7 +9665,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10257,7 +9676,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10270,7 +9688,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10283,7 +9700,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10295,8 +9711,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10309,8 +9723,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10334,7 +9746,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10350,8 +9761,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10368,8 +9777,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10386,7 +9793,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10397,7 +9803,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10409,7 +9814,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10422,7 +9826,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10435,7 +9838,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10447,8 +9849,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10461,8 +9861,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10486,7 +9884,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10502,8 +9899,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10520,8 +9915,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10538,7 +9931,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10549,7 +9941,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10561,7 +9952,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10574,7 +9964,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10587,7 +9976,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10599,8 +9987,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10613,8 +9999,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10638,7 +10022,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10654,8 +10037,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10672,8 +10053,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10690,7 +10069,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10701,7 +10079,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10713,7 +10090,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10726,7 +10102,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10739,7 +10114,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10751,8 +10125,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10765,8 +10137,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10790,7 +10160,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10806,8 +10175,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10824,8 +10191,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10842,7 +10207,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10853,7 +10217,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10865,7 +10228,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10878,7 +10240,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10891,7 +10252,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10903,8 +10263,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10917,8 +10275,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10942,7 +10298,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10958,8 +10313,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10976,8 +10329,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10994,7 +10345,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11005,7 +10355,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11017,7 +10366,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11030,7 +10378,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11043,7 +10390,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11055,8 +10401,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11069,8 +10413,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11094,7 +10436,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11110,8 +10451,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11128,8 +10467,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11146,7 +10483,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11157,7 +10493,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11169,7 +10504,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11182,7 +10516,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11195,7 +10528,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11207,8 +10539,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11221,8 +10551,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11246,7 +10574,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11262,8 +10589,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11280,8 +10605,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11298,7 +10621,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11309,7 +10631,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11321,7 +10642,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11334,7 +10654,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11347,7 +10666,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11359,8 +10677,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11373,8 +10689,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11705,7 +11019,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -11723,8 +11036,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -11742,8 +11053,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -11761,7 +11070,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -11775,7 +11083,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -11787,7 +11094,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -11800,7 +11106,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -11813,7 +11118,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -11825,8 +11129,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -11838,8 +11140,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -11864,7 +11164,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11884,8 +11183,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11906,8 +11203,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11928,7 +11223,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -11943,7 +11237,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11957,7 +11250,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11971,7 +11263,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11986,7 +11277,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11999,8 +11289,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12015,8 +11303,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12044,7 +11330,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12064,8 +11349,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12086,8 +11369,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12108,7 +11389,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12123,7 +11403,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12137,7 +11416,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12151,7 +11429,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -12166,7 +11443,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -12179,8 +11455,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12195,8 +11469,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12552,7 +11824,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12572,8 +11843,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12594,8 +11863,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12616,7 +11883,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12631,7 +11897,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12645,7 +11910,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12659,7 +11923,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -12674,7 +11937,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -12687,8 +11949,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12703,8 +11963,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12732,7 +11990,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12752,8 +12009,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12774,8 +12029,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12796,7 +12049,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12811,7 +12063,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12825,7 +12076,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12839,7 +12089,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -12854,7 +12103,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -12867,8 +12115,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12883,8 +12129,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12912,7 +12156,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12932,8 +12175,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12954,8 +12195,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12976,7 +12215,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12991,7 +12229,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13005,7 +12242,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13019,7 +12255,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13034,7 +12269,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13047,8 +12281,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13063,8 +12295,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13092,7 +12322,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13112,8 +12341,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13134,8 +12361,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13156,7 +12381,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13171,7 +12395,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13185,7 +12408,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13199,7 +12421,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13214,7 +12435,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13227,8 +12447,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13243,8 +12461,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13272,7 +12488,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13292,8 +12507,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13314,8 +12527,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13336,7 +12547,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13351,7 +12561,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13365,7 +12574,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13379,7 +12587,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13394,7 +12601,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13407,8 +12613,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13423,8 +12627,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13452,7 +12654,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13472,8 +12673,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13494,8 +12693,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13516,7 +12713,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13531,7 +12727,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13545,7 +12740,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13559,7 +12753,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13574,7 +12767,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13587,8 +12779,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13603,8 +12793,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13632,7 +12820,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13652,8 +12839,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13674,8 +12859,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13696,7 +12879,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13711,7 +12893,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13725,7 +12906,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13739,7 +12919,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13754,7 +12933,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13767,8 +12945,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13783,8 +12959,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13812,7 +12986,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13832,8 +13005,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13854,8 +13025,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13876,7 +13045,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13891,7 +13059,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13905,7 +13072,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13919,7 +13085,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13934,7 +13099,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13947,8 +13111,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13963,8 +13125,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -431,7 +431,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -446,8 +445,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -463,8 +460,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -480,7 +475,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 @@ -494,7 +488,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -510,7 +503,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -526,7 +518,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -541,7 +532,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -555,8 +545,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -570,8 +558,6 @@ ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -823,7 +809,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -836,8 +821,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -850,8 +833,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -863,7 +844,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -875,7 +855,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -887,7 +866,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -899,7 +877,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -911,7 +888,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -923,8 +899,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -936,8 +910,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -955,7 +927,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -968,8 +939,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -982,8 +951,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -995,7 +962,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1007,7 +973,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1019,7 +984,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1031,7 +995,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1043,7 +1006,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1055,8 +1017,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1068,8 +1028,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -1344,7 +1302,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1357,8 +1314,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1371,8 +1326,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1384,7 +1337,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1396,7 +1348,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1408,7 +1359,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1420,7 +1370,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1432,7 +1381,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1444,8 +1392,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1457,8 +1403,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1476,7 +1420,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1491,8 +1434,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1509,8 +1450,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1526,7 +1465,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1539,7 +1477,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -1554,7 +1491,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -1569,7 +1505,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1583,7 +1518,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1597,8 +1531,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1614,8 +1546,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1637,7 +1567,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1652,8 +1581,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1670,8 +1597,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1687,7 +1612,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1700,7 +1624,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -1715,7 +1638,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -1730,7 +1652,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1744,7 +1665,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1758,8 +1678,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1775,8 +1693,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1948,7 +1864,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1964,8 +1879,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1982,8 +1895,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1999,7 +1910,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -2013,7 +1923,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2029,7 +1938,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2045,7 +1953,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2060,7 +1967,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2075,8 +1981,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2092,8 +1996,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2116,7 +2018,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2132,8 +2033,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2150,8 +2049,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2167,7 +2064,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -2181,7 +2077,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2197,7 +2092,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2213,7 +2107,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2228,7 +2121,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2243,8 +2135,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2260,8 +2150,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2541,7 +2429,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2555,8 +2442,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2570,8 +2455,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -2585,7 +2468,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2596,7 +2478,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2607,7 +2488,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2618,7 +2498,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2629,7 +2508,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2639,8 +2517,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2650,8 +2526,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -2672,7 +2546,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2688,8 +2561,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2707,8 +2578,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2726,7 +2595,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2738,7 +2606,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2752,7 +2619,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2766,7 +2632,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2779,7 +2644,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2791,8 +2655,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2806,8 +2668,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2832,7 +2692,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2848,8 +2707,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2867,8 +2724,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2886,7 +2741,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2898,7 +2752,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2912,7 +2765,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2926,7 +2778,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2939,7 +2790,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2951,8 +2801,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2966,8 +2814,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3276,7 +3122,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3292,8 +3137,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3311,8 +3154,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3330,7 +3171,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3342,7 +3182,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3356,7 +3195,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3370,7 +3208,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3383,7 +3220,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3395,8 +3231,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3410,8 +3244,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3436,7 +3268,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3452,8 +3283,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3471,8 +3300,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3490,7 +3317,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3502,7 +3328,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3516,7 +3341,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3530,7 +3354,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3543,7 +3366,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3555,8 +3377,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3570,8 +3390,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3596,7 +3414,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3612,8 +3429,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3631,8 +3446,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3650,7 +3463,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3662,7 +3474,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3676,7 +3487,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3690,7 +3500,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3703,7 +3512,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3715,8 +3523,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3730,8 +3536,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3756,7 +3560,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3772,8 +3575,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3791,8 +3592,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3810,7 +3609,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3822,7 +3620,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3836,7 +3633,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3850,7 +3646,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3863,7 +3658,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3875,8 +3669,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3890,8 +3682,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3916,7 +3706,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3932,8 +3721,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3951,8 +3738,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3970,7 +3755,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3982,7 +3766,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3996,7 +3779,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4010,7 +3792,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4023,7 +3804,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4035,8 +3815,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4050,8 +3828,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4076,7 +3852,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4092,8 +3867,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4111,8 +3884,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4130,7 +3901,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4142,7 +3912,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4156,7 +3925,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4170,7 +3938,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4183,7 +3950,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4195,8 +3961,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4210,8 +3974,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4236,7 +3998,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4252,8 +4013,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4271,8 +4030,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4290,7 +4047,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4302,7 +4058,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4316,7 +4071,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4330,7 +4084,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4343,7 +4096,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4355,8 +4107,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4370,8 +4120,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4396,7 +4144,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4412,8 +4159,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4431,8 +4176,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4450,7 +4193,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4462,7 +4204,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4476,7 +4217,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4490,7 +4230,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4503,7 +4242,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4515,8 +4253,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4530,8 +4266,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -4857,7 +4591,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -4875,8 +4608,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -4894,8 +4625,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -4913,7 +4642,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4928,7 +4656,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4941,7 +4668,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4954,7 +4680,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4967,7 +4692,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4979,8 +4703,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -4992,8 +4714,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5018,7 +4738,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5037,8 +4756,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5058,8 +4775,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5079,7 +4794,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5094,7 +4808,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5109,7 +4822,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5124,7 +4836,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5138,7 +4849,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5151,8 +4861,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5166,8 +4874,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5194,7 +4900,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5213,8 +4918,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5234,9 +4937,7 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc +; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv ; GFX10-CU-NEXT: buffer_gl1_inv @@ -5255,7 +4956,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5270,7 +4970,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5285,7 +4984,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5300,7 +4998,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5314,7 +5011,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5327,8 +5023,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5342,8 +5036,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5686,7 +5378,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5705,8 +5396,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5726,8 +5415,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5747,7 +5434,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5762,7 +5448,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5777,7 +5462,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5792,7 +5476,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5806,7 +5489,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5819,8 +5501,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5834,8 +5514,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5862,7 +5540,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5881,8 +5558,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5902,8 +5577,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5923,7 +5596,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5938,7 +5610,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5953,7 +5624,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5968,7 +5638,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5982,7 +5651,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5995,8 +5663,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6010,8 +5676,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6038,7 +5702,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6057,8 +5720,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6078,8 +5739,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6099,7 +5758,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6114,7 +5772,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6129,7 +5786,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6144,7 +5800,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6158,7 +5813,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6171,8 +5825,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6186,8 +5838,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6214,7 +5864,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6233,8 +5882,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6254,8 +5901,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6275,7 +5920,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6290,7 +5934,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6305,7 +5948,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6320,7 +5962,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6334,7 +5975,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6347,8 +5987,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6362,8 +6000,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6390,7 +6026,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6409,8 +6044,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6430,8 +6063,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6451,7 +6082,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6466,7 +6096,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6481,7 +6110,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6496,7 +6124,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6510,7 +6137,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6523,8 +6149,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6538,8 +6162,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6566,7 +6188,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6585,8 +6206,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6606,8 +6225,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6627,7 +6244,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6642,7 +6258,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6657,7 +6272,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6672,7 +6286,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6686,7 +6299,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6699,8 +6311,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6714,8 +6324,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6742,7 +6350,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6761,8 +6368,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6782,8 +6387,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6803,7 +6406,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6818,7 +6420,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6833,7 +6434,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6848,7 +6448,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6862,7 +6461,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6875,8 +6473,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6890,8 +6486,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6918,7 +6512,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6937,8 +6530,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6958,8 +6549,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6979,7 +6568,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -6994,7 +6582,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -7009,7 +6596,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -7024,7 +6610,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7038,7 +6623,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7051,8 +6635,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7066,8 +6648,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7512,7 +7092,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -7528,8 +7107,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7546,8 +7123,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7564,7 +7139,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 @@ -7579,7 +7153,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -7596,7 +7169,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -7612,7 +7184,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7628,7 +7199,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7642,8 +7212,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7658,8 +7226,6 @@ ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7912,7 +7478,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -7925,8 +7490,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -7939,8 +7502,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -7952,7 +7513,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -7964,7 +7524,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7976,7 +7535,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7988,7 +7546,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8000,7 +7557,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8012,8 +7568,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8025,8 +7579,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -8044,7 +7596,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8057,8 +7608,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -8071,8 +7620,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -8084,7 +7631,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8096,7 +7642,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8108,7 +7653,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8120,7 +7664,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8132,7 +7675,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8144,8 +7686,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8157,8 +7697,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -8429,7 +7967,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8442,8 +7979,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -8456,8 +7991,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -8469,7 +8002,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8481,7 +8013,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8493,7 +8024,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8505,7 +8035,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8517,7 +8046,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8529,8 +8057,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8542,8 +8068,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -8561,7 +8085,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8576,8 +8099,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8593,8 +8114,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8609,7 +8128,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8622,7 +8140,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8637,7 +8154,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8652,7 +8168,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8666,7 +8181,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8680,8 +8194,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8696,8 +8208,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8718,7 +8228,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8733,8 +8242,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8750,8 +8257,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8766,7 +8271,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8779,7 +8283,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8794,7 +8297,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8809,7 +8311,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8823,7 +8324,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8837,8 +8337,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8853,8 +8351,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9032,7 +8528,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9049,8 +8544,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9068,8 +8561,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9086,7 +8577,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -9100,7 +8590,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -9117,7 +8606,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -9133,7 +8621,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9149,7 +8636,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9164,8 +8650,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9182,8 +8666,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9207,7 +8689,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9224,8 +8705,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9243,8 +8722,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9261,7 +8738,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -9275,7 +8751,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -9292,7 +8767,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -9308,7 +8782,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9324,7 +8797,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9339,8 +8811,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9357,8 +8827,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9635,7 +9103,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -9649,8 +9116,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -9664,8 +9129,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -9679,7 +9142,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -9690,7 +9152,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9701,7 +9162,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -9712,7 +9172,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9723,7 +9182,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -9733,8 +9191,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -9744,8 +9200,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -9766,7 +9220,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9782,8 +9235,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9800,8 +9251,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9818,7 +9267,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9830,7 +9278,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -9844,7 +9291,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -9858,7 +9304,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9871,7 +9316,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9883,8 +9327,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9897,8 +9339,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9922,7 +9362,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9938,8 +9377,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9956,8 +9393,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9974,7 +9409,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9986,7 +9420,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10000,7 +9433,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10014,7 +9446,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10027,7 +9458,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10039,8 +9469,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10053,8 +9481,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10354,7 +9780,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10370,8 +9795,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10388,8 +9811,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10406,7 +9827,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10418,7 +9838,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10432,7 +9851,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10446,7 +9864,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10459,7 +9876,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10471,8 +9887,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10485,8 +9899,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10510,7 +9922,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10526,8 +9937,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10544,8 +9953,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10562,7 +9969,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10574,7 +9980,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10588,7 +9993,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10602,7 +10006,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10615,7 +10018,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10627,8 +10029,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10641,8 +10041,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10666,7 +10064,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10682,8 +10079,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10700,8 +10095,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10718,7 +10111,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10730,7 +10122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10744,7 +10135,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10758,7 +10148,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10771,7 +10160,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10783,8 +10171,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10797,8 +10183,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10822,7 +10206,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10838,8 +10221,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10856,8 +10237,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10874,7 +10253,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10886,7 +10264,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10900,7 +10277,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10914,7 +10290,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10927,7 +10302,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10939,8 +10313,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10953,8 +10325,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10978,7 +10348,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10994,8 +10363,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11012,8 +10379,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11030,7 +10395,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11042,7 +10406,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11056,7 +10419,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11070,7 +10432,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11083,7 +10444,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11095,8 +10455,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11109,8 +10467,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11134,7 +10490,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11150,8 +10505,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11168,8 +10521,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11186,7 +10537,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11198,7 +10548,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11212,7 +10561,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11226,7 +10574,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11239,7 +10586,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11251,8 +10597,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11265,8 +10609,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11290,7 +10632,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11306,8 +10647,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11324,8 +10663,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11342,7 +10679,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11354,7 +10690,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11368,7 +10703,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11382,7 +10716,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11395,7 +10728,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11407,8 +10739,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11421,8 +10751,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11446,7 +10774,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11462,8 +10789,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11480,8 +10805,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11498,7 +10821,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11510,7 +10832,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11524,7 +10845,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11538,7 +10858,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11551,7 +10870,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11563,8 +10881,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11577,8 +10893,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11911,7 +11225,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -11929,8 +11242,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -11948,8 +11259,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -11967,7 +11276,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -11982,7 +11290,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -11995,7 +11302,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -12008,7 +11314,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -12021,7 +11326,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -12033,8 +11337,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -12046,8 +11348,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -12072,7 +11372,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12092,8 +11391,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12114,8 +11411,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12136,7 +11431,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12152,7 +11446,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12168,7 +11461,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12183,7 +11475,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12198,7 +11489,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12211,8 +11501,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12227,8 +11515,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12256,7 +11542,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12276,8 +11561,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12298,8 +11581,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12320,7 +11601,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12336,7 +11616,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12352,7 +11631,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12367,7 +11645,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12382,7 +11659,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12395,8 +11671,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12411,8 +11685,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12772,7 +12044,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12792,8 +12063,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12814,8 +12083,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12836,7 +12103,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -12852,7 +12118,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12868,7 +12133,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12883,7 +12147,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12898,7 +12161,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12911,8 +12173,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12927,8 +12187,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12956,7 +12214,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12976,8 +12233,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12998,8 +12253,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13020,7 +12273,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13036,7 +12288,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13052,7 +12303,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13067,7 +12317,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13082,7 +12331,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13095,8 +12343,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13111,8 +12357,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13140,7 +12384,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13160,8 +12403,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13182,8 +12423,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13204,7 +12443,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13220,7 +12458,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13236,7 +12473,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13251,7 +12487,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13266,7 +12501,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13279,8 +12513,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13295,8 +12527,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13324,7 +12554,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13344,8 +12573,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13366,8 +12593,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13388,7 +12613,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13404,7 +12628,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13420,7 +12643,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13435,7 +12657,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13450,7 +12671,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13463,8 +12683,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13479,8 +12697,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13508,7 +12724,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13528,8 +12743,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13550,8 +12763,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13572,7 +12783,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13588,7 +12798,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13604,7 +12813,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13619,7 +12827,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13634,7 +12841,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13647,8 +12853,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13663,8 +12867,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13692,7 +12894,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13712,8 +12913,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13734,8 +12933,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13756,7 +12953,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13772,7 +12968,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13788,7 +12983,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13803,7 +12997,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13818,7 +13011,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13831,8 +13023,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13847,8 +13037,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13876,7 +13064,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13896,8 +13083,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13918,8 +13103,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13940,7 +13123,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -13956,7 +13138,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13972,7 +13153,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13987,7 +13167,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -14002,7 +13181,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -14015,8 +13193,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14031,8 +13207,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -14060,7 +13234,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -14080,8 +13253,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -14102,8 +13273,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -14124,7 +13293,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -14140,7 +13308,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -14156,7 +13323,6 @@ ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -14171,7 +13337,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -14186,7 +13351,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -14199,8 +13363,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14215,8 +13377,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -478,7 +478,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -491,8 +490,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -505,7 +502,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -517,7 +513,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -529,8 +524,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -542,7 +535,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -426,7 +426,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -441,8 +440,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -457,7 +454,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -472,7 +468,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_load_dword v2, v[0:1] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 @@ -487,7 +482,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 @@ -502,7 +496,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -517,7 +510,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 @@ -532,7 +524,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -546,8 +537,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -560,7 +549,6 @@ ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 @@ -811,7 +799,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -824,8 +811,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -838,7 +823,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -850,7 +834,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -861,7 +844,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -872,7 +854,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -883,7 +864,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -894,7 +874,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -906,8 +885,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -919,7 +896,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -937,7 +913,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -950,8 +925,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -964,7 +937,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -976,7 +948,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -987,7 +958,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -998,7 +968,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1009,7 +978,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1020,7 +988,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1032,8 +999,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1045,7 +1010,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { @@ -1307,7 +1271,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1320,8 +1283,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1334,7 +1295,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_endpgm ; @@ -1346,7 +1306,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1357,7 +1316,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1368,7 +1326,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1379,7 +1336,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1390,7 +1346,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1402,8 +1357,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1415,7 +1368,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { @@ -1433,7 +1385,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1447,8 +1398,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1464,7 +1413,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1477,7 +1425,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1489,7 +1436,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1501,7 +1447,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1514,7 +1459,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1526,7 +1470,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -1540,8 +1483,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1556,7 +1497,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1575,7 +1515,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1589,8 +1528,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1606,7 +1543,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1619,7 +1555,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v[0:1], v2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1631,7 +1566,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1643,7 +1577,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1656,7 +1589,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1668,7 +1600,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -1682,8 +1613,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1698,7 +1627,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1856,7 +1784,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 @@ -1871,8 +1798,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1888,7 +1813,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -1902,7 +1826,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -1915,7 +1838,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -1928,7 +1850,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1942,7 +1863,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -1955,7 +1875,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -1970,8 +1889,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1986,7 +1903,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -2007,7 +1923,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 @@ -2022,8 +1937,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2039,7 +1952,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 @@ -2053,7 +1965,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2 @@ -2066,7 +1977,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -2079,7 +1989,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2093,7 +2002,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -2106,7 +2014,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2121,8 +2028,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2137,7 +2042,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -2402,7 +2306,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2416,8 +2319,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -2431,7 +2332,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_endpgm ; @@ -2445,7 +2345,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2455,7 +2354,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2465,7 +2363,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2475,7 +2372,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2485,7 +2381,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2495,8 +2390,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2506,7 +2399,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { @@ -2527,7 +2419,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2542,8 +2433,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2560,7 +2449,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2575,7 +2463,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2586,7 +2473,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2597,7 +2483,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2609,7 +2494,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2620,7 +2504,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2632,8 +2515,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2646,7 +2527,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2668,7 +2548,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2683,8 +2562,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2701,7 +2578,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2716,7 +2592,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2727,7 +2602,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2738,7 +2612,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2750,7 +2623,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2761,7 +2633,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2773,8 +2644,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2787,7 +2656,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3067,7 +2935,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3082,8 +2949,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3100,7 +2965,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3115,7 +2979,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3126,7 +2989,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3137,7 +2999,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3149,7 +3010,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3160,7 +3020,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3172,8 +3031,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3186,7 +3043,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3208,7 +3064,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3223,8 +3078,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3241,7 +3094,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3256,7 +3108,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3267,7 +3118,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3278,7 +3128,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3290,7 +3139,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3301,7 +3149,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3313,8 +3160,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3327,7 +3172,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3349,7 +3193,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3364,8 +3207,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3382,7 +3223,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3397,7 +3237,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3408,7 +3247,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3419,7 +3257,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3431,7 +3268,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3442,7 +3278,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3454,8 +3289,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3468,7 +3301,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3490,7 +3322,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3505,8 +3336,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3523,7 +3352,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3538,7 +3366,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3549,7 +3376,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3560,7 +3386,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3572,7 +3397,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3583,7 +3407,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3595,8 +3418,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -3609,7 +3430,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3924,7 +3744,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -3942,8 +3761,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -3961,7 +3778,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 @@ -3979,7 +3795,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -3993,7 +3808,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4005,7 +3819,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4017,7 +3830,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4029,7 +3841,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4041,8 +3852,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -4054,7 +3863,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -4079,7 +3887,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -4098,8 +3905,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4118,7 +3923,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -4137,7 +3941,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4152,7 +3955,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4164,7 +3966,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4177,7 +3978,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4189,7 +3989,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4202,8 +4001,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4216,7 +4013,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -4241,7 +4037,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -4260,8 +4055,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4280,7 +4073,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -4299,7 +4091,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4314,7 +4105,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4326,7 +4116,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4339,7 +4128,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4351,7 +4139,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4364,8 +4151,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4378,7 +4163,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -4703,7 +4487,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -4722,8 +4505,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4742,7 +4523,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -4761,7 +4541,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4776,7 +4555,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4788,7 +4566,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4801,7 +4578,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4813,7 +4589,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4826,8 +4601,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4840,7 +4613,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -4865,7 +4637,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -4884,8 +4655,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4904,7 +4673,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -4923,7 +4691,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4938,7 +4705,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4950,7 +4716,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4963,7 +4728,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -4975,7 +4739,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4988,8 +4751,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5002,7 +4763,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5027,7 +4787,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5046,8 +4805,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5066,7 +4823,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5085,7 +4841,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5100,7 +4855,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5112,7 +4866,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5125,7 +4878,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5137,7 +4889,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5150,8 +4901,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5164,7 +4913,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5189,7 +4937,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5208,8 +4955,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5228,7 +4973,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5247,7 +4991,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5262,7 +5005,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5274,7 +5016,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5287,7 +5028,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5299,7 +5039,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5312,8 +5051,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5326,7 +5063,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5351,7 +5087,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5370,8 +5105,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5390,7 +5123,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5409,7 +5141,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5424,7 +5155,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5436,7 +5166,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5449,7 +5178,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5461,7 +5189,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5474,8 +5201,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5488,7 +5213,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5513,7 +5237,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5532,8 +5255,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5552,7 +5273,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5571,7 +5291,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5586,7 +5305,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5598,7 +5316,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5611,7 +5328,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5623,7 +5339,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5636,8 +5351,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5650,7 +5363,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5675,7 +5387,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5694,8 +5405,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5714,7 +5423,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5733,7 +5441,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5748,7 +5455,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5760,7 +5466,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5773,7 +5478,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5785,7 +5489,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5798,8 +5501,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5812,7 +5513,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -5837,7 +5537,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5856,8 +5555,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5876,7 +5573,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -5895,7 +5591,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s5 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v3, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5910,7 +5605,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5922,7 +5616,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5935,7 +5628,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -5947,7 +5639,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5960,8 +5651,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5974,7 +5663,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 @@ -6413,8 +6101,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6469,7 +6155,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6497,7 +6182,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6511,8 +6195,6 @@ ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6787,8 +6469,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -6832,7 +6512,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -6853,7 +6532,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -6865,8 +6543,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -6907,8 +6583,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -6952,7 +6626,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -6973,7 +6646,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -6985,8 +6657,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -7263,8 +6933,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_endpgm ; @@ -7308,7 +6976,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7329,7 +6996,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7341,8 +7007,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_endpgm ; @@ -7383,8 +7047,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7430,7 +7092,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7453,7 +7114,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -7467,8 +7127,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7511,8 +7169,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7558,7 +7214,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7581,7 +7236,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -7595,8 +7249,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7782,8 +7434,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7837,7 +7487,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7863,7 +7512,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -7878,8 +7526,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7929,8 +7575,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7984,7 +7628,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8010,7 +7653,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8025,8 +7667,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8311,8 +7951,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8357,7 +7995,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8376,7 +8013,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8386,8 +8022,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -8430,8 +8064,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8478,7 +8110,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8499,7 +8130,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8511,8 +8141,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8557,8 +8185,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8605,7 +8231,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8626,7 +8251,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8638,8 +8262,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8926,8 +8548,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8974,7 +8594,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8995,7 +8614,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9007,8 +8625,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9053,8 +8669,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9101,7 +8715,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9122,7 +8735,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9134,8 +8746,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9180,8 +8790,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9228,7 +8836,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9249,7 +8856,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9261,8 +8867,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9307,8 +8911,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9355,7 +8957,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9376,7 +8977,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9388,8 +8988,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9434,8 +9032,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9482,7 +9078,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9503,7 +9098,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9515,8 +9109,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9561,8 +9153,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9609,7 +9199,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9630,7 +9219,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9642,8 +9230,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9688,8 +9274,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9736,7 +9320,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9757,7 +9340,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9769,8 +9351,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9815,8 +9395,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9863,7 +9441,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9884,7 +9461,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9896,8 +9472,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10238,8 +9812,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 @@ -10298,7 +9870,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -10321,7 +9892,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 @@ -10333,8 +9903,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 @@ -10387,8 +9955,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10449,7 +10015,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10473,7 +10038,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10486,8 +10050,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10542,8 +10104,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10604,7 +10164,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10628,7 +10187,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10641,8 +10199,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10995,8 +10551,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11057,7 +10611,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11081,7 +10634,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11094,8 +10646,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11150,8 +10700,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11212,7 +10760,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11236,7 +10783,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11249,8 +10795,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11305,8 +10849,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11367,7 +10909,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11391,7 +10932,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11404,8 +10944,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11460,8 +10998,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11522,7 +11058,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11546,7 +11081,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11559,8 +11093,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11615,8 +11147,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11677,7 +11207,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11701,7 +11230,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11714,8 +11242,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11770,8 +11296,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11832,7 +11356,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11856,7 +11379,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11869,8 +11391,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11925,8 +11445,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11987,7 +11505,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12011,7 +11528,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12024,8 +11540,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12080,8 +11594,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-WGP-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12142,7 +11654,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12166,7 +11677,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12179,8 +11689,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -436,7 +436,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -451,7 +450,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -464,8 +462,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -477,8 +474,7 @@ ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -494,7 +490,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 @@ -506,7 +501,7 @@ ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -517,7 +512,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -528,7 +523,7 @@ ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -539,7 +534,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -550,8 +545,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -565,8 +559,7 @@ ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -841,7 +834,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -853,7 +845,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -865,8 +856,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -878,8 +867,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -891,7 +878,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -902,7 +888,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -913,7 +898,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -925,7 +909,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -937,7 +920,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -948,8 +930,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -962,8 +942,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -983,7 +961,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -995,7 +972,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1007,8 +983,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1020,8 +994,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1033,7 +1005,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1044,7 +1015,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1055,7 +1025,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1067,7 +1036,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1079,7 +1047,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1090,8 +1057,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1104,8 +1069,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1396,7 +1359,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1408,7 +1370,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1420,8 +1381,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1433,8 +1392,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1446,7 +1403,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1457,7 +1413,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1468,7 +1423,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1480,7 +1434,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1492,7 +1445,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1503,8 +1455,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1517,8 +1467,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1538,7 +1486,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1552,7 +1499,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1566,8 +1512,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1582,8 +1526,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1598,7 +1540,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1610,7 +1551,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1623,7 +1563,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1637,7 +1576,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -1651,7 +1589,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -1664,8 +1601,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1679,8 +1614,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -1701,7 +1634,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1715,7 +1647,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1729,8 +1660,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1745,8 +1674,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1761,7 +1688,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1773,7 +1699,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1786,7 +1711,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1800,7 +1724,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -1814,7 +1737,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -1827,8 +1749,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1842,8 +1762,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2026,7 +1944,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2041,7 +1958,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2056,8 +1972,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2073,8 +1987,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2090,7 +2002,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2103,7 +2014,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2117,7 +2027,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2132,7 +2041,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2147,7 +2055,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2161,8 +2068,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2179,8 +2084,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2205,7 +2108,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2220,7 +2122,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2235,8 +2136,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2252,8 +2151,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2269,7 +2166,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2282,7 +2178,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2296,7 +2191,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2311,7 +2205,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2326,7 +2219,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2340,8 +2232,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2358,8 +2248,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2655,7 +2543,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2669,7 +2556,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2680,8 +2566,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2692,8 +2576,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2707,7 +2589,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2717,7 +2598,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2727,7 +2607,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2738,7 +2617,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2749,7 +2627,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2759,8 +2636,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2772,8 +2647,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2796,7 +2669,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2812,7 +2684,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2825,8 +2696,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2840,8 +2709,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2858,7 +2725,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2869,7 +2735,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2881,7 +2746,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2894,7 +2758,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -2907,7 +2770,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -2919,8 +2781,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2933,8 +2793,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2958,7 +2816,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2974,7 +2831,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2987,8 +2843,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3002,8 +2856,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3020,7 +2872,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3031,7 +2882,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3043,7 +2893,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3056,7 +2905,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3069,7 +2917,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3081,8 +2928,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3095,8 +2940,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3410,7 +3253,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3426,7 +3268,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3439,8 +3280,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3454,8 +3293,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3472,7 +3309,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3483,7 +3319,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3495,7 +3330,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3508,7 +3342,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3521,7 +3354,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3533,8 +3365,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3547,8 +3377,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3572,7 +3400,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3588,7 +3415,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3601,8 +3427,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3616,8 +3440,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3634,7 +3456,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3645,7 +3466,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3657,7 +3477,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3670,7 +3489,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3683,7 +3501,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3695,8 +3512,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3709,8 +3524,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3734,7 +3547,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3750,7 +3562,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3763,8 +3574,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3778,8 +3587,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3796,7 +3603,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3807,7 +3613,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3819,7 +3624,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3832,7 +3636,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -3845,7 +3648,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -3857,8 +3659,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3871,8 +3671,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3896,7 +3694,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3912,7 +3709,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3925,8 +3721,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3940,8 +3734,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3958,7 +3750,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3969,7 +3760,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3981,7 +3771,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3994,7 +3783,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4007,7 +3795,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4019,8 +3806,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4033,8 +3818,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4058,7 +3841,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4074,7 +3856,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4087,8 +3868,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4102,8 +3881,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4120,7 +3897,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4131,7 +3907,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4143,7 +3918,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4156,7 +3930,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4169,7 +3942,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4181,8 +3953,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4195,8 +3965,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4220,7 +3988,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4236,7 +4003,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4249,8 +4015,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4264,8 +4028,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4282,7 +4044,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4293,7 +4054,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4305,7 +4065,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4318,7 +4077,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4331,7 +4089,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4343,8 +4100,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4357,8 +4112,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4382,7 +4135,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4398,7 +4150,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4411,8 +4162,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4426,8 +4175,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4444,7 +4191,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4455,7 +4201,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4467,7 +4212,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4480,7 +4224,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4493,7 +4236,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4505,8 +4247,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4519,8 +4259,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4544,7 +4282,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4560,7 +4297,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4573,8 +4309,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4588,8 +4322,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4606,7 +4338,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4617,7 +4348,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4629,7 +4359,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4642,7 +4371,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -4655,7 +4383,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -4667,8 +4394,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4681,8 +4406,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5020,7 +4743,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5036,7 +4758,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5051,8 +4772,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] @@ -5065,8 +4784,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -5082,7 +4799,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5094,7 +4810,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5106,7 +4821,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5119,7 +4833,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5132,7 +4845,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5144,8 +4856,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5159,8 +4869,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5187,7 +4895,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5204,7 +4911,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5220,8 +4926,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5236,8 +4940,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5255,7 +4957,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5267,7 +4968,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5280,7 +4980,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5294,7 +4993,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5308,7 +5006,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5321,8 +5018,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5338,8 +5033,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5368,7 +5061,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5385,7 +5077,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5401,8 +5092,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5417,8 +5106,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5436,7 +5123,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5448,7 +5134,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5461,7 +5146,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5475,7 +5159,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5489,7 +5172,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -5502,8 +5184,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5519,8 +5199,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5877,7 +5555,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5894,7 +5571,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5910,8 +5586,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5926,8 +5600,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5945,7 +5617,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5957,7 +5628,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5970,7 +5640,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5984,7 +5653,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -5998,7 +5666,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6011,8 +5678,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6028,8 +5693,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6058,7 +5721,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6075,7 +5737,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6091,8 +5752,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6107,8 +5766,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6126,7 +5783,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6138,7 +5794,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6151,7 +5806,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6165,7 +5819,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6179,7 +5832,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6192,8 +5844,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6209,8 +5859,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6239,7 +5887,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6256,7 +5903,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6272,8 +5918,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6288,8 +5932,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6307,7 +5949,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6319,7 +5960,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6332,7 +5972,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6346,7 +5985,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6360,7 +5998,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6373,8 +6010,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6390,8 +6025,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6420,7 +6053,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6437,7 +6069,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6453,8 +6084,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6469,8 +6098,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6488,7 +6115,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6500,7 +6126,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6513,7 +6138,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6527,7 +6151,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6541,7 +6164,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6554,8 +6176,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6571,8 +6191,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6601,7 +6219,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6618,7 +6235,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6634,8 +6250,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6650,8 +6264,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6669,7 +6281,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6681,7 +6292,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6694,7 +6304,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6708,7 +6317,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6722,7 +6330,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6735,8 +6342,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6752,8 +6357,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6782,7 +6385,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6799,7 +6401,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6815,8 +6416,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6831,8 +6430,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6850,7 +6447,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6862,7 +6458,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6875,7 +6470,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6889,7 +6483,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -6903,7 +6496,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -6916,8 +6508,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6933,8 +6523,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6963,7 +6551,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6980,7 +6567,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6996,8 +6582,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7012,8 +6596,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7031,7 +6613,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -7043,7 +6624,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7056,7 +6636,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7070,7 +6649,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -7084,7 +6662,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -7097,8 +6674,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7114,8 +6689,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7144,7 +6717,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -7161,7 +6733,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -7177,8 +6748,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7193,8 +6762,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7212,7 +6779,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -7224,7 +6790,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7237,7 +6802,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7251,7 +6815,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -7265,7 +6828,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -7278,8 +6840,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7295,8 +6855,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7739,7 +7297,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -7754,7 +7311,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -7767,8 +7323,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7780,8 +7335,7 @@ ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7797,7 +7351,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 @@ -7809,7 +7362,7 @@ ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7820,7 +7373,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7831,7 +7384,7 @@ ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -7842,7 +7395,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -7853,8 +7406,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7868,8 +7420,7 @@ ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8144,7 +7695,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -8156,7 +7706,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8168,8 +7717,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8181,8 +7728,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8194,7 +7739,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8205,7 +7749,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8216,7 +7759,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8228,7 +7770,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8240,7 +7781,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8251,8 +7791,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8265,8 +7803,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8286,7 +7822,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -8298,7 +7833,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8310,8 +7844,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8323,8 +7855,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8336,7 +7866,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8347,7 +7876,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8358,7 +7886,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8370,7 +7897,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8382,7 +7908,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8393,8 +7918,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8407,8 +7930,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8699,7 +8220,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -8711,7 +8231,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8723,8 +8242,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8736,8 +8253,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8749,7 +8264,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8760,7 +8274,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8771,7 +8284,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8783,7 +8295,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8795,7 +8306,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8806,8 +8316,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8820,8 +8328,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8841,7 +8347,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -8855,7 +8360,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8869,8 +8373,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8885,8 +8387,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8901,7 +8401,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8913,7 +8412,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8926,7 +8424,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8940,7 +8437,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -8954,7 +8450,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -8967,8 +8462,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8982,8 +8475,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9004,7 +8495,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -9018,7 +8508,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9032,8 +8521,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9048,8 +8535,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9064,7 +8549,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9076,7 +8560,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9089,7 +8572,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9103,7 +8585,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9117,7 +8598,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9130,8 +8610,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9145,8 +8623,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9329,7 +8805,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -9344,7 +8819,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9359,8 +8833,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9376,8 +8848,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9393,7 +8863,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -9406,7 +8875,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9420,7 +8888,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9435,7 +8902,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9450,7 +8916,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9464,8 +8929,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9482,8 +8945,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9508,7 +8969,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -9523,7 +8983,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9538,8 +8997,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9555,8 +9012,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9572,7 +9027,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -9585,7 +9039,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9599,7 +9052,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9614,7 +9066,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -9629,7 +9080,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -9643,8 +9093,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9661,8 +9109,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9958,7 +9404,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -9972,7 +9417,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -9983,8 +9427,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -9995,8 +9437,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -10010,7 +9450,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -10020,7 +9459,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -10030,7 +9468,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -10041,7 +9478,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -10052,7 +9488,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -10062,8 +9497,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -10075,8 +9508,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -10099,7 +9530,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10115,7 +9545,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10128,8 +9557,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10143,8 +9570,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10161,7 +9586,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10172,7 +9596,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10184,7 +9607,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10197,7 +9619,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10210,7 +9631,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10222,8 +9642,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10236,8 +9654,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10261,7 +9677,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10277,7 +9692,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10290,8 +9704,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10305,8 +9717,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10323,7 +9733,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10334,7 +9743,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10346,7 +9754,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10359,7 +9766,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10372,7 +9778,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10384,8 +9789,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10398,8 +9801,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10713,7 +10114,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10729,7 +10129,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10742,8 +10141,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10757,8 +10154,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10775,7 +10170,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10786,7 +10180,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10798,7 +10191,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10811,7 +10203,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10824,7 +10215,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10836,8 +10226,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10850,8 +10238,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10875,7 +10261,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10891,7 +10276,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10904,8 +10288,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10919,8 +10301,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10937,7 +10317,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10948,7 +10327,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10960,7 +10338,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10973,7 +10350,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -10986,7 +10362,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -10998,8 +10373,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11012,8 +10385,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11037,7 +10408,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11053,7 +10423,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11066,8 +10435,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11081,8 +10448,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11099,7 +10464,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11110,7 +10474,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11122,7 +10485,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11135,7 +10497,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11148,7 +10509,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11160,8 +10520,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11174,8 +10532,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11199,7 +10555,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11215,7 +10570,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11228,8 +10582,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11243,8 +10595,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11261,7 +10611,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11272,7 +10621,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11284,7 +10632,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11297,7 +10644,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11310,7 +10656,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11322,8 +10667,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11336,8 +10679,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11361,7 +10702,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11377,7 +10717,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11390,8 +10729,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11405,8 +10742,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11423,7 +10758,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11434,7 +10768,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11446,7 +10779,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11459,7 +10791,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11472,7 +10803,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11484,8 +10814,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11498,8 +10826,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11523,7 +10849,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11539,7 +10864,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11552,8 +10876,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11567,8 +10889,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11585,7 +10905,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11596,7 +10915,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11608,7 +10926,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11621,7 +10938,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11634,7 +10950,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11646,8 +10961,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11660,8 +10973,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11685,7 +10996,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11701,7 +11011,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11714,8 +11023,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11729,8 +11036,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11747,7 +11052,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11758,7 +11062,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11770,7 +11073,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11783,7 +11085,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11796,7 +11097,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11808,8 +11108,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11822,8 +11120,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11847,7 +11143,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11863,7 +11158,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11876,8 +11170,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11891,8 +11183,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11909,7 +11199,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11920,7 +11209,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11932,7 +11220,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11945,7 +11232,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -11958,7 +11244,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -11970,8 +11255,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11984,8 +11267,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12323,7 +11604,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12340,7 +11620,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12356,8 +11635,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12372,8 +11649,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12391,7 +11666,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -12403,7 +11677,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12416,7 +11689,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12430,7 +11702,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -12444,7 +11715,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -12457,8 +11727,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12474,8 +11742,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12504,7 +11770,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12521,7 +11786,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12537,8 +11801,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12553,8 +11815,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12572,7 +11832,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -12584,7 +11843,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12597,7 +11855,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12611,7 +11868,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -12625,7 +11881,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -12638,8 +11893,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12655,8 +11908,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13013,7 +12264,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13030,7 +12280,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13046,8 +12295,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13062,8 +12309,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13081,7 +12326,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13093,7 +12337,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13106,7 +12349,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13120,7 +12362,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13134,7 +12375,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13147,8 +12387,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13164,8 +12402,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13194,7 +12430,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13211,7 +12446,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13227,8 +12461,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13243,8 +12475,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13262,7 +12492,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13274,7 +12503,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13287,7 +12515,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13301,7 +12528,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13315,7 +12541,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13328,8 +12553,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13345,8 +12568,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13375,7 +12596,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13392,7 +12612,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13408,8 +12627,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13424,8 +12641,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13443,7 +12658,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13455,7 +12669,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13468,7 +12681,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13482,7 +12694,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13496,7 +12707,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13509,8 +12719,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13526,8 +12734,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13556,7 +12762,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13573,7 +12778,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13589,8 +12793,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13605,8 +12807,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13624,7 +12824,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13636,7 +12835,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13649,7 +12847,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13663,7 +12860,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13677,7 +12873,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13690,8 +12885,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13707,8 +12900,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13737,7 +12928,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13754,7 +12944,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13770,8 +12959,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13786,8 +12973,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13805,7 +12990,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13817,7 +13001,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13830,7 +13013,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13844,7 +13026,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -13858,7 +13039,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -13871,8 +13051,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13888,8 +13066,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13918,7 +13094,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13935,7 +13110,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13951,8 +13125,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13967,8 +13139,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13986,7 +13156,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13998,7 +13167,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14011,7 +13179,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14025,7 +13192,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -14039,7 +13205,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -14052,8 +13217,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14069,8 +13232,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -14099,7 +13260,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -14116,7 +13276,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -14132,8 +13291,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -14148,8 +13305,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -14167,7 +13322,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -14179,7 +13333,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14192,7 +13345,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14206,7 +13358,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -14220,7 +13371,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -14233,8 +13383,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14250,8 +13398,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -14280,7 +13426,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -14297,7 +13442,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -14313,8 +13457,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -14329,8 +13471,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -14348,7 +13488,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -14360,7 +13499,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14373,7 +13511,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -14387,7 +13524,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 @@ -14401,7 +13537,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 @@ -14414,8 +13549,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14431,8 +13564,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -438,7 +438,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -453,7 +452,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -466,8 +464,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -479,8 +476,7 @@ ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -496,7 +492,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 @@ -508,7 +503,7 @@ ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -520,7 +515,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -532,7 +527,7 @@ ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -543,7 +538,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -554,8 +549,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -569,8 +563,7 @@ ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -845,7 +838,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -857,7 +849,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -869,8 +860,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -882,8 +871,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -895,7 +882,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -907,7 +893,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -919,7 +904,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -931,7 +915,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -943,7 +926,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -954,8 +936,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -968,8 +948,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -989,7 +967,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1001,7 +978,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1013,8 +989,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1026,8 +1000,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1039,7 +1011,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1051,7 +1022,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1063,7 +1033,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1075,7 +1044,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1087,7 +1055,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1098,8 +1065,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1112,8 +1077,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1406,7 +1369,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1418,7 +1380,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1430,8 +1391,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1443,8 +1402,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1456,7 +1413,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1468,7 +1424,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1480,7 +1435,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1492,7 +1446,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1504,7 +1457,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1515,8 +1467,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1529,8 +1479,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1550,7 +1498,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1564,7 +1511,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1578,8 +1524,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1594,8 +1538,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1610,7 +1552,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1623,7 +1564,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -1638,7 +1578,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -1653,7 +1592,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1667,7 +1605,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1680,8 +1617,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1695,8 +1630,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -1717,7 +1650,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -1731,7 +1663,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -1745,8 +1676,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1761,8 +1690,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -1777,7 +1704,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1790,7 +1716,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -1805,7 +1730,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -1820,7 +1744,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1834,7 +1757,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -1847,8 +1769,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1862,8 +1782,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2048,7 +1966,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2063,7 +1980,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2078,8 +1994,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2095,8 +2009,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2112,7 +2024,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2126,7 +2037,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2142,7 +2052,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2158,7 +2067,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2173,7 +2081,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2187,8 +2094,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2205,8 +2110,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2231,7 +2134,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2246,7 +2148,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2261,8 +2162,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2278,8 +2177,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2295,7 +2192,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2309,7 +2205,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2325,7 +2220,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2341,7 +2235,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2356,7 +2249,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2370,8 +2262,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2388,8 +2278,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2687,7 +2575,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2701,7 +2588,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2712,8 +2598,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2724,8 +2608,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2739,7 +2621,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2750,7 +2631,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2761,7 +2641,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2772,7 +2651,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2783,7 +2661,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2793,8 +2670,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2806,8 +2681,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2830,7 +2703,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -2846,7 +2718,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -2859,8 +2730,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2874,8 +2743,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -2892,7 +2759,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2904,7 +2770,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -2918,7 +2783,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -2932,7 +2796,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2945,7 +2808,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -2957,8 +2819,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2971,8 +2831,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -2996,7 +2854,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3012,7 +2869,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3025,8 +2881,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3040,8 +2894,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3058,7 +2910,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3070,7 +2921,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3084,7 +2934,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3098,7 +2947,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3111,7 +2959,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3123,8 +2970,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3137,8 +2982,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3456,7 +3299,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3472,7 +3314,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3485,8 +3326,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3500,8 +3339,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3518,7 +3355,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3530,7 +3366,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3544,7 +3379,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3558,7 +3392,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3571,7 +3404,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3583,8 +3415,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3597,8 +3427,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3622,7 +3450,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3638,7 +3465,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3651,8 +3477,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3666,8 +3490,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3684,7 +3506,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3696,7 +3517,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3710,7 +3530,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3724,7 +3543,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3737,7 +3555,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3749,8 +3566,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3763,8 +3578,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3788,7 +3601,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3804,7 +3616,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3817,8 +3628,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3832,8 +3641,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -3850,7 +3657,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3862,7 +3668,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -3876,7 +3681,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -3890,7 +3694,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3903,7 +3706,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -3915,8 +3717,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3929,8 +3729,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -3954,7 +3752,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -3970,7 +3767,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -3983,8 +3779,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3998,8 +3792,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4016,7 +3808,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4028,7 +3819,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4042,7 +3832,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4056,7 +3845,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4069,7 +3857,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4081,8 +3868,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4095,8 +3880,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4436,7 +4219,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4453,7 +4235,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4469,8 +4250,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4485,8 +4264,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4504,7 +4281,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4517,7 +4293,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4532,7 +4307,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4547,7 +4321,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4561,7 +4334,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4574,8 +4346,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4591,8 +4361,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -4621,7 +4389,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -4638,7 +4405,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -4654,8 +4420,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4670,8 +4434,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -4689,7 +4451,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4702,7 +4463,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -4717,7 +4477,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -4732,7 +4491,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4746,7 +4504,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -4759,8 +4516,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4776,8 +4531,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5138,7 +4891,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5155,7 +4907,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5171,8 +4922,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5187,8 +4936,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5206,7 +4953,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5219,7 +4965,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5234,7 +4979,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5249,7 +4993,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5263,7 +5006,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5276,8 +5018,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5293,8 +5033,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5323,7 +5061,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5340,7 +5077,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5356,8 +5092,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5372,8 +5106,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5391,7 +5123,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5404,7 +5135,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5419,7 +5149,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5434,7 +5163,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5448,7 +5176,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5461,8 +5188,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5478,8 +5203,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5508,7 +5231,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5525,7 +5247,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5541,8 +5262,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5557,8 +5276,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5576,7 +5293,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5589,7 +5305,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5604,7 +5319,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5619,7 +5333,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5633,7 +5346,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5646,8 +5358,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5663,8 +5373,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5693,7 +5401,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5710,7 +5417,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5726,8 +5432,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5742,8 +5446,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5761,7 +5463,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5774,7 +5475,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5789,7 +5489,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5804,7 +5503,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5818,7 +5516,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -5831,8 +5528,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5848,8 +5543,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -5878,7 +5571,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -5895,7 +5587,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -5911,8 +5602,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5927,8 +5616,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -5946,7 +5633,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5959,7 +5645,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -5974,7 +5659,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -5989,7 +5673,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6003,7 +5686,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6016,8 +5698,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6033,8 +5713,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6063,7 +5741,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6080,7 +5757,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6096,8 +5772,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6112,8 +5786,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6131,7 +5803,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6144,7 +5815,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6159,7 +5829,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6174,7 +5843,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6188,7 +5856,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6201,8 +5868,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6218,8 +5883,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6248,7 +5911,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6265,7 +5927,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6281,8 +5942,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6297,8 +5956,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6316,7 +5973,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6329,7 +5985,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6344,7 +5999,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6359,7 +6013,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6373,7 +6026,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6386,8 +6038,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6403,8 +6053,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -6433,7 +6081,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -6450,7 +6097,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -6466,8 +6112,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6482,8 +6126,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -6501,7 +6143,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6514,7 +6155,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -6529,7 +6169,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -6544,7 +6183,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6558,7 +6196,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -6571,8 +6208,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6588,8 +6223,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7034,7 +6667,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -7049,7 +6681,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -7062,8 +6693,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7075,8 +6705,7 @@ ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -7092,7 +6721,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 @@ -7104,7 +6732,7 @@ ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -7116,7 +6744,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -7128,7 +6756,7 @@ ; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7139,7 +6767,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -7150,8 +6778,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7165,8 +6792,7 @@ ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -7441,7 +7067,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -7453,7 +7078,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -7465,8 +7089,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7478,8 +7100,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -7491,7 +7111,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -7503,7 +7122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7515,7 +7133,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7527,7 +7144,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7539,7 +7155,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7550,8 +7165,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7564,8 +7177,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7585,7 +7196,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -7597,7 +7207,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -7609,8 +7218,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7622,8 +7229,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -7635,7 +7240,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -7647,7 +7251,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7659,7 +7262,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7671,7 +7273,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -7683,7 +7284,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7694,8 +7294,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7708,8 +7306,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8002,7 +7598,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -8014,7 +7609,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -8026,8 +7620,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8039,8 +7631,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -8052,7 +7642,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -8064,7 +7653,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8076,7 +7664,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8088,7 +7675,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -8100,7 +7686,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8111,8 +7696,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8125,8 +7708,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8146,7 +7727,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -8160,7 +7740,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8174,8 +7753,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8190,8 +7767,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8206,7 +7781,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8219,7 +7793,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8234,7 +7807,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8249,7 +7821,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8263,7 +7834,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8276,8 +7846,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8291,8 +7859,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8313,7 +7879,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -8327,7 +7892,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8341,8 +7905,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8357,8 +7919,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8373,7 +7933,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -8386,7 +7945,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8401,7 +7959,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8416,7 +7973,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8430,7 +7986,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8443,8 +7998,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8458,8 +8011,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8644,7 +8195,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -8659,7 +8209,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8674,8 +8223,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8691,8 +8238,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8708,7 +8253,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -8722,7 +8266,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8738,7 +8281,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8754,7 +8296,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8769,7 +8310,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8783,8 +8323,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8801,8 +8339,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -8827,7 +8363,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -8842,7 +8377,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -8857,8 +8391,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8874,8 +8406,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -8891,7 +8421,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -8905,7 +8434,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -8921,7 +8449,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -8937,7 +8464,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8952,7 +8478,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -8966,8 +8491,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8984,8 +8507,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9283,7 +8804,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -9297,7 +8817,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -9308,8 +8827,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -9320,8 +8837,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -9335,7 +8850,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -9346,7 +8860,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9357,7 +8870,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -9368,7 +8880,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -9379,7 +8890,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -9389,8 +8899,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -9402,8 +8910,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -9426,7 +8932,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -9442,7 +8947,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9455,8 +8959,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9470,8 +8972,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9488,7 +8988,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9500,7 +8999,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -9514,7 +9012,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -9528,7 +9025,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9541,7 +9037,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9553,8 +9048,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9567,8 +9060,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -9592,7 +9083,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -9608,7 +9098,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -9621,8 +9110,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9636,8 +9123,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -9654,7 +9139,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -9666,7 +9150,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -9680,7 +9163,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -9694,7 +9176,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9707,7 +9188,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -9719,8 +9199,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9733,8 +9211,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10052,7 +9528,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10068,7 +9543,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10081,8 +9555,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10096,8 +9568,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10114,7 +9584,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10126,7 +9595,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10140,7 +9608,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10154,7 +9621,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10167,7 +9633,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10179,8 +9644,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10193,8 +9656,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10218,7 +9679,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10234,7 +9694,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10247,8 +9706,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10262,8 +9719,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10280,7 +9735,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10292,7 +9746,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10306,7 +9759,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10320,7 +9772,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10333,7 +9784,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10345,8 +9795,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10359,8 +9807,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10384,7 +9830,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10400,7 +9845,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10413,8 +9857,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10428,8 +9870,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10446,7 +9886,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10458,7 +9897,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10472,7 +9910,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10486,7 +9923,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10499,7 +9935,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10511,8 +9946,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10525,8 +9958,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10550,7 +9981,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10566,7 +9996,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10579,8 +10008,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10594,8 +10021,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10612,7 +10037,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10624,7 +10048,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10638,7 +10061,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10652,7 +10074,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10665,7 +10086,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10677,8 +10097,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10691,8 +10109,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10716,7 +10132,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10732,7 +10147,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10745,8 +10159,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10760,8 +10172,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10778,7 +10188,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10790,7 +10199,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10804,7 +10212,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10818,7 +10225,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10831,7 +10237,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10843,8 +10248,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10857,8 +10260,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -10882,7 +10283,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -10898,7 +10298,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -10911,8 +10310,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10926,8 +10323,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -10944,7 +10339,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -10956,7 +10350,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -10970,7 +10363,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -10984,7 +10376,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -10997,7 +10388,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11009,8 +10399,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11023,8 +10411,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11048,7 +10434,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11064,7 +10449,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11077,8 +10461,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11092,8 +10474,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11110,7 +10490,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11122,7 +10501,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11136,7 +10514,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11150,7 +10527,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11163,7 +10539,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11175,8 +10550,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11189,8 +10562,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11214,7 +10585,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11230,7 +10600,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11243,8 +10612,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11258,8 +10625,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11276,7 +10641,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -11288,7 +10652,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11302,7 +10665,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11316,7 +10678,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11329,7 +10690,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11341,8 +10701,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11355,8 +10713,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: buffer_gl0_inv @@ -11696,7 +11052,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -11712,7 +11067,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -11727,8 +11081,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] @@ -11741,8 +11093,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -11758,7 +11108,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -11771,7 +11120,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -11784,7 +11132,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -11797,7 +11144,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -11810,7 +11156,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -11822,8 +11167,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] @@ -11837,8 +11180,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -11865,7 +11206,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -11882,7 +11222,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -11898,8 +11237,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11914,8 +11251,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -11933,7 +11268,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -11946,7 +11280,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -11961,7 +11294,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -11976,7 +11308,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -11990,7 +11321,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12003,8 +11333,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12020,8 +11348,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12050,7 +11376,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12067,7 +11392,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12083,8 +11407,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12099,8 +11421,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12118,7 +11438,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -12131,7 +11450,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12146,7 +11464,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12161,7 +11478,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12175,7 +11491,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12188,8 +11503,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12205,8 +11518,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12567,7 +11878,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12584,7 +11894,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12600,8 +11909,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12616,8 +11923,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12635,7 +11940,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -12648,7 +11952,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12663,7 +11966,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12678,7 +11980,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12692,7 +11993,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12705,8 +12005,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12722,8 +12020,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12752,7 +12048,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12769,7 +12064,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12785,8 +12079,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12801,8 +12093,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -12820,7 +12110,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -12833,7 +12122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -12848,7 +12136,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -12863,7 +12150,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12877,7 +12163,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -12890,8 +12175,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12907,8 +12190,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -12937,7 +12218,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -12954,7 +12234,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -12970,8 +12249,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12986,8 +12263,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13005,7 +12280,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13018,7 +12292,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13033,7 +12306,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13048,7 +12320,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13062,7 +12333,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13075,8 +12345,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13092,8 +12360,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13122,7 +12388,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13139,7 +12404,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13155,8 +12419,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13171,8 +12433,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13190,7 +12450,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13203,7 +12462,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13218,7 +12476,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13233,7 +12490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13247,7 +12503,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13260,8 +12515,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13277,8 +12530,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13307,7 +12558,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13324,7 +12574,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13340,8 +12589,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13356,8 +12603,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13375,7 +12620,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13388,7 +12632,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13403,7 +12646,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13418,7 +12660,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13432,7 +12673,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13445,8 +12685,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13462,8 +12700,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13492,7 +12728,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13509,7 +12744,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13525,8 +12759,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13541,8 +12773,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13560,7 +12790,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13573,7 +12802,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13588,7 +12816,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13603,7 +12830,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13617,7 +12843,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13630,8 +12855,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13647,8 +12870,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13677,7 +12898,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13694,7 +12914,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13710,8 +12929,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13726,8 +12943,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13745,7 +12960,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13758,7 +12972,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13773,7 +12986,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13788,7 +13000,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13802,7 +13013,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13815,8 +13025,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13832,8 +13040,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv @@ -13862,7 +13068,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -13879,7 +13084,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_wbinvl1_vol @@ -13895,8 +13099,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13911,8 +13113,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt vmcnt(0) -; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: buffer_gl0_inv @@ -13930,7 +13130,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -13943,7 +13142,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 @@ -13958,7 +13156,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: buffer_wbl2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_invl2 @@ -13973,7 +13170,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -13987,7 +13183,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 @@ -14000,8 +13195,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -14017,8 +13210,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt vmcnt(0) -; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll @@ -516,7 +516,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -528,7 +527,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -540,8 +538,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -553,7 +549,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -565,7 +560,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -576,8 +570,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -590,7 +582,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -426,7 +426,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_mov_b32 s4, s2 ; GFX6-NEXT: s_mov_b32 s5, s3 @@ -440,7 +439,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_load_dword v2, v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -452,8 +450,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -478,7 +475,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s3 @@ -500,7 +496,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -521,7 +517,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -532,8 +528,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -819,7 +814,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -831,7 +825,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -843,8 +836,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -856,7 +847,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -868,7 +858,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -879,7 +868,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -890,7 +878,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -901,7 +888,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -912,7 +898,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -923,8 +908,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -937,7 +920,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -957,7 +939,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -969,7 +950,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -981,8 +961,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -994,7 +972,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1006,7 +983,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1017,7 +993,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1028,7 +1003,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1039,7 +1013,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1050,7 +1023,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1061,8 +1033,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1075,7 +1045,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1351,7 +1320,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1363,7 +1331,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1375,8 +1342,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -1388,7 +1353,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1400,7 +1364,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1411,7 +1374,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1422,7 +1384,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1433,7 +1394,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1444,7 +1404,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1455,8 +1414,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1469,7 +1426,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1489,7 +1445,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1501,7 +1456,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1513,8 +1467,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1528,7 +1480,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1540,7 +1491,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1551,7 +1501,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1562,7 +1511,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1575,7 +1523,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1586,7 +1533,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -1599,8 +1545,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1613,7 +1557,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1633,7 +1576,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -1645,7 +1587,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v[0:1], v2 ; GFX7-NEXT: s_endpgm ; @@ -1657,8 +1598,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1672,7 +1611,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-CU-NEXT: s_endpgm ; @@ -1684,7 +1622,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1695,7 +1632,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1706,7 +1642,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -1719,7 +1654,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1730,7 +1664,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -1743,8 +1676,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1757,7 +1688,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -1929,7 +1859,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1943,7 +1872,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 @@ -1957,8 +1885,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1973,7 +1899,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] @@ -1987,7 +1912,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2000,7 +1924,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -2013,7 +1936,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2027,7 +1949,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 @@ -2040,7 +1961,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2054,8 +1974,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2071,7 +1989,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2094,7 +2011,6 @@ ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2108,7 +2024,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s2 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: flat_store_dword v[0:1], v2 @@ -2122,8 +2037,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2138,7 +2051,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[0:1] @@ -2152,7 +2064,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s4 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2165,7 +2076,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] @@ -2178,7 +2088,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2192,7 +2101,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 @@ -2205,7 +2113,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2219,8 +2126,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2236,7 +2141,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2515,7 +2419,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2529,7 +2432,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2540,8 +2442,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2552,7 +2452,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2566,7 +2465,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2576,7 +2474,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2586,7 +2483,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2596,7 +2492,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2606,7 +2501,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2616,8 +2510,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2629,7 +2521,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2652,7 +2543,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2666,7 +2556,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2677,8 +2566,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2691,7 +2578,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2705,7 +2591,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2715,7 +2600,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2725,7 +2609,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2737,7 +2620,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2747,7 +2629,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2759,8 +2640,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2772,7 +2651,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -2795,7 +2673,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2809,7 +2686,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -2820,8 +2696,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2834,7 +2708,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2848,7 +2721,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2858,7 +2730,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2868,7 +2739,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -2880,7 +2750,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2890,7 +2759,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -2902,8 +2770,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2915,7 +2781,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3198,7 +3063,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3212,7 +3076,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3223,8 +3086,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3237,7 +3098,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3251,7 +3111,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3261,7 +3120,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3271,7 +3129,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3283,7 +3140,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3293,7 +3149,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3305,8 +3160,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3318,7 +3171,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3341,7 +3193,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3355,7 +3206,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3366,8 +3216,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3380,7 +3228,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3394,7 +3241,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3404,7 +3250,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3414,7 +3259,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3426,7 +3270,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3436,7 +3279,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3448,8 +3290,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3461,7 +3301,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3484,7 +3323,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3498,7 +3336,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3509,8 +3346,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3523,7 +3358,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3537,7 +3371,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3547,7 +3380,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3557,7 +3389,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3569,7 +3400,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3579,7 +3409,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3591,8 +3420,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3604,7 +3431,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3627,7 +3453,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3641,7 +3466,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3652,8 +3476,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3666,7 +3488,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3680,7 +3501,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3690,7 +3510,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3700,7 +3519,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3712,7 +3530,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3722,7 +3539,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3734,8 +3550,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3747,7 +3561,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3770,7 +3583,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3784,7 +3596,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3795,8 +3606,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3809,7 +3618,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3823,7 +3631,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3833,7 +3640,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3843,7 +3649,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3855,7 +3660,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3865,7 +3669,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -3877,8 +3680,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3890,7 +3691,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -3913,7 +3713,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -3927,7 +3726,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -3938,8 +3736,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3952,7 +3748,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -3966,7 +3761,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -3976,7 +3770,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -3986,7 +3779,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -3998,7 +3790,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4008,7 +3799,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4020,8 +3810,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4033,7 +3821,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4056,7 +3843,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -4070,7 +3856,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -4081,8 +3866,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4095,7 +3878,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -4109,7 +3891,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -4119,7 +3900,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4129,7 +3909,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4141,7 +3920,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4151,7 +3929,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4163,8 +3940,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4176,7 +3951,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4199,7 +3973,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -4213,7 +3986,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] ; GFX7-NEXT: s_endpgm ; @@ -4224,8 +3996,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4238,7 +4008,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -4252,7 +4021,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -4262,7 +4030,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4272,7 +4039,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4284,7 +4050,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4294,7 +4059,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4306,8 +4070,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4319,7 +4081,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-CU-NEXT: s_nop 0 ; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -4646,7 +4407,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4662,7 +4422,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -4677,8 +4436,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] @@ -4691,7 +4448,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -4707,7 +4463,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4719,7 +4474,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -4731,7 +4485,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -4743,7 +4496,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -4755,7 +4507,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -4767,8 +4518,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] @@ -4782,7 +4531,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -4809,7 +4557,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4825,7 +4572,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -4840,8 +4586,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4855,7 +4599,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -4871,7 +4614,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4883,7 +4625,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -4895,7 +4636,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -4908,7 +4648,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -4920,7 +4659,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -4933,8 +4671,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4949,7 +4685,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -4976,7 +4711,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -4992,7 +4726,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5007,8 +4740,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5022,7 +4753,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -5038,7 +4768,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5050,7 +4779,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5062,7 +4790,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5075,7 +4802,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5087,7 +4813,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5100,8 +4825,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5116,7 +4839,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5451,7 +5173,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5467,7 +5188,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5482,8 +5202,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5497,7 +5215,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -5513,7 +5230,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5525,7 +5241,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5537,7 +5252,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5550,7 +5264,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5562,7 +5275,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5575,8 +5287,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5591,7 +5301,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5618,7 +5327,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5634,7 +5342,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5649,8 +5356,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5664,7 +5369,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -5680,7 +5384,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5692,7 +5395,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5704,7 +5406,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5717,7 +5418,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5729,7 +5429,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5742,8 +5441,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5758,7 +5455,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5785,7 +5481,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5801,7 +5496,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5816,8 +5510,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5831,7 +5523,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -5847,7 +5538,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5859,7 +5549,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -5871,7 +5560,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -5884,7 +5572,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -5896,7 +5583,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -5909,8 +5595,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5925,7 +5609,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -5952,7 +5635,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -5968,7 +5650,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -5983,8 +5664,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5998,7 +5677,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -6014,7 +5692,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6026,7 +5703,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -6038,7 +5714,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6051,7 +5726,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -6063,7 +5737,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6076,8 +5749,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6092,7 +5763,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -6119,7 +5789,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6135,7 +5804,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -6150,8 +5818,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6165,7 +5831,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -6181,7 +5846,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6193,7 +5857,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -6205,7 +5868,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6218,7 +5880,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -6230,7 +5891,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6243,8 +5903,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6259,7 +5917,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -6286,7 +5943,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6302,7 +5958,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -6317,8 +5972,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6332,7 +5985,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -6348,7 +6000,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6360,7 +6011,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -6372,7 +6022,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6385,7 +6034,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -6397,7 +6045,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6410,8 +6057,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6426,7 +6071,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -6453,7 +6097,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6469,7 +6112,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -6484,8 +6126,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6499,7 +6139,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -6515,7 +6154,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6527,7 +6165,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -6539,7 +6176,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6552,7 +6188,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -6564,7 +6199,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6577,8 +6211,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6593,7 +6225,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -6620,7 +6251,6 @@ ; GFX6-NEXT: s_mov_b32 s4, s0 ; GFX6-NEXT: s_mov_b32 s5, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6636,7 +6266,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mov_b32_e32 v3, s3 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -6651,8 +6280,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6666,7 +6293,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: global_store_dword v2, v0, s[0:1] @@ -6682,7 +6308,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc ; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0) ; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -6694,7 +6319,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -6706,7 +6330,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -6719,7 +6342,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -6731,7 +6353,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -6744,8 +6365,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6760,7 +6379,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v2, v0, s[0:1] @@ -7215,8 +6833,7 @@ ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -7262,7 +6879,7 @@ ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -7283,7 +6900,7 @@ ; GFX940-TGSPLIT: ; %bb.0: ; %entry ; GFX940-TGSPLIT-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -7294,8 +6911,7 @@ ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -7603,8 +7219,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7647,7 +7261,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7668,7 +7281,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7679,8 +7291,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -7734,8 +7344,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -7778,7 +7386,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -7799,7 +7406,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -7810,8 +7416,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8121,8 +7725,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_endpgm ; @@ -8165,7 +7767,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -8186,7 +7787,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -8197,8 +7797,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -8252,8 +7850,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8298,7 +7894,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8321,7 +7916,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8334,8 +7928,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8389,8 +7981,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8435,7 +8025,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8458,7 +8047,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[2:3] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8471,8 +8059,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8682,8 +8268,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8735,7 +8319,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8761,7 +8344,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8775,8 +8357,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -8840,8 +8420,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -8893,7 +8471,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -8919,7 +8496,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -8933,8 +8509,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9251,8 +8825,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -9294,7 +8866,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -9313,7 +8884,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -9323,8 +8893,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_nop 0 ; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) @@ -9381,8 +8949,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9426,7 +8992,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9447,7 +9012,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9459,8 +9023,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9517,8 +9079,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9562,7 +9122,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9583,7 +9142,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9595,8 +9153,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -9913,8 +9469,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -9958,7 +9512,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -9979,7 +9532,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -9991,8 +9543,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10049,8 +9599,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10094,7 +9642,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10115,7 +9662,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10127,8 +9673,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10185,8 +9729,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10230,7 +9772,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10251,7 +9792,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10263,8 +9803,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10321,8 +9859,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10366,7 +9902,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10387,7 +9922,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10399,8 +9933,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10457,8 +9989,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10502,7 +10032,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10523,7 +10052,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10535,8 +10063,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10593,8 +10119,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10638,7 +10162,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10659,7 +10182,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10671,8 +10193,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10729,8 +10249,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10774,7 +10292,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10795,7 +10312,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10807,8 +10323,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -10865,8 +10379,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -10910,7 +10422,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -10931,7 +10442,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -10943,8 +10453,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], s[0:1] offset:16 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11311,8 +10819,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v2, v0, s[0:1] @@ -11362,7 +10868,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] @@ -11385,7 +10890,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 @@ -11397,8 +10901,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v2, v0, s[0:1] @@ -11467,8 +10969,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11519,7 +11019,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11543,7 +11042,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11556,8 +11054,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -11627,8 +11123,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -11679,7 +11173,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -11703,7 +11196,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -11716,8 +11208,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12095,8 +11585,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12147,7 +11635,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12171,7 +11658,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12184,8 +11670,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12255,8 +11739,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12307,7 +11789,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12331,7 +11812,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12344,8 +11824,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12415,8 +11893,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12467,7 +11943,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12491,7 +11966,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12504,8 +11978,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12575,8 +12047,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12627,7 +12097,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12651,7 +12120,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12664,8 +12132,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12735,8 +12201,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12787,7 +12251,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12811,7 +12274,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12824,8 +12286,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -12895,8 +12355,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -12947,7 +12405,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -12971,7 +12428,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -12984,8 +12440,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13055,8 +12509,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13107,7 +12559,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13131,7 +12582,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -13144,8 +12594,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -13215,8 +12663,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -13267,7 +12713,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol @@ -13291,7 +12736,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 @@ -13304,8 +12748,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: global_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -409,7 +409,6 @@ ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v1, s1 @@ -422,7 +421,6 @@ ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_read_b32 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -435,8 +433,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_read_b32 v0, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -449,7 +445,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_read_b32 v0, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v1, v0 @@ -461,7 +456,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -473,7 +467,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -485,7 +478,6 @@ ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -498,7 +490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -510,7 +501,6 @@ ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -523,8 +513,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_load_b32 v0, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -536,7 +524,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_load_b32 v0, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 @@ -768,7 +755,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -779,7 +765,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -789,8 +774,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -800,7 +783,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -811,7 +793,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -821,7 +802,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -831,7 +811,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -841,7 +820,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -851,7 +829,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -860,8 +837,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -870,7 +845,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -887,7 +861,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -898,7 +871,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -908,8 +880,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -919,7 +889,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -930,7 +899,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -940,7 +908,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -950,7 +917,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -960,7 +926,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -970,7 +935,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -979,8 +943,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -989,7 +951,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -1231,7 +1192,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -1242,7 +1202,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -1252,8 +1211,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1263,7 +1220,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1274,7 +1230,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1284,7 +1239,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1294,7 +1248,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1304,7 +1257,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1314,7 +1266,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1323,8 +1274,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1333,7 +1282,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in) { @@ -1350,7 +1298,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1362,7 +1309,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1373,8 +1319,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1386,7 +1330,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1398,7 +1341,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1409,7 +1351,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1420,7 +1361,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1431,7 +1371,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1442,7 +1381,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1452,8 +1390,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1464,7 +1400,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1482,7 +1417,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1494,7 +1428,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1505,8 +1438,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1518,7 +1449,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1530,7 +1460,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1541,7 +1470,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1552,7 +1480,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1563,7 +1490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1574,7 +1500,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1584,8 +1509,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1596,7 +1519,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1747,7 +1669,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1760,7 +1681,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1772,8 +1692,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1786,7 +1704,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1799,7 +1716,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1811,7 +1727,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1823,7 +1738,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1836,7 +1750,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1848,7 +1761,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1860,8 +1772,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1873,7 +1783,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -1893,7 +1802,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1906,7 +1814,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1918,8 +1825,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1932,7 +1837,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1945,7 +1849,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1957,7 +1860,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1969,7 +1871,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1982,7 +1883,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1994,7 +1894,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -2006,8 +1905,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2019,7 +1916,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -2292,7 +2188,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2304,7 +2199,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_endpgm ; @@ -2315,8 +2209,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2327,7 +2219,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2339,7 +2230,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2350,7 +2240,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2361,7 +2250,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2372,7 +2260,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2383,7 +2270,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2393,8 +2279,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2404,7 +2288,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in, i32 %old) { @@ -2424,7 +2307,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2437,7 +2319,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2449,8 +2330,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2463,7 +2342,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2476,7 +2354,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2488,7 +2365,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2500,7 +2376,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2512,7 +2387,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2524,7 +2398,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2535,8 +2408,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2548,7 +2419,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2569,7 +2439,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2582,7 +2451,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2594,8 +2462,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2608,7 +2474,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2621,7 +2486,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2633,7 +2497,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2645,7 +2508,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2657,7 +2519,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2669,7 +2530,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2680,8 +2540,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2693,7 +2551,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2978,7 +2835,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2991,7 +2847,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3003,8 +2858,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3017,7 +2870,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3030,7 +2882,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3042,7 +2893,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3054,7 +2904,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3066,7 +2915,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3078,7 +2926,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3089,8 +2936,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3102,7 +2947,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3123,7 +2967,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3136,7 +2979,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3148,8 +2990,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3162,7 +3002,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3175,7 +3014,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3187,7 +3025,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3199,7 +3036,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3211,7 +3047,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3223,7 +3058,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3234,8 +3068,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3247,7 +3079,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3268,7 +3099,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3281,7 +3111,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3293,8 +3122,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3307,7 +3134,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3320,7 +3146,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3332,7 +3157,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3344,7 +3168,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3356,7 +3179,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3368,7 +3190,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3379,8 +3200,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3392,7 +3211,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3413,7 +3231,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3426,7 +3243,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3438,8 +3254,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3452,7 +3266,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3465,7 +3278,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3477,7 +3289,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3489,7 +3300,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3501,7 +3311,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3513,7 +3322,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3524,8 +3332,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3537,7 +3343,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3558,7 +3363,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3571,7 +3375,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3583,8 +3386,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3597,7 +3398,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3610,7 +3410,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3622,7 +3421,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3634,7 +3432,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3646,7 +3443,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3658,7 +3454,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3669,8 +3464,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3682,7 +3475,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3703,7 +3495,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3716,7 +3507,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3728,8 +3518,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3742,7 +3530,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3755,7 +3542,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3767,7 +3553,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3779,7 +3564,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3791,7 +3575,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3803,7 +3586,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3814,8 +3596,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3827,7 +3607,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3848,7 +3627,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3861,7 +3639,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3873,8 +3650,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3887,7 +3662,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3900,7 +3674,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3912,7 +3685,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3924,7 +3696,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3936,7 +3707,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3948,7 +3718,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3959,8 +3728,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3972,7 +3739,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3993,7 +3759,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -4006,7 +3771,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -4018,8 +3782,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4032,7 +3794,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4045,7 +3806,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4057,7 +3817,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4069,7 +3828,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4081,7 +3839,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -4093,7 +3850,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -4104,8 +3860,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4117,7 +3871,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4428,7 +4181,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4442,7 +4194,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4455,8 +4206,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -4469,7 +4218,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4483,7 +4231,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4496,7 +4243,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4509,7 +4255,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4522,7 +4267,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4535,7 +4279,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4547,8 +4290,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -4560,7 +4301,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4584,7 +4324,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4598,7 +4337,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4611,8 +4349,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4626,7 +4362,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4640,7 +4375,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4653,7 +4387,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4666,7 +4399,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4680,7 +4412,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4693,7 +4424,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4706,8 +4436,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4720,7 +4448,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4744,7 +4471,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4758,7 +4484,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4771,8 +4496,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4786,7 +4509,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4800,7 +4522,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4813,7 +4534,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4826,7 +4546,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4840,7 +4559,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4853,7 +4571,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4866,8 +4583,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4880,7 +4595,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5198,7 +4912,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5212,7 +4925,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5225,8 +4937,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5240,7 +4950,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5254,7 +4963,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5267,7 +4975,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5280,7 +4987,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5294,7 +5000,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5307,7 +5012,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5320,8 +5024,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5334,7 +5036,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5358,7 +5059,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5372,7 +5072,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5385,8 +5084,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5400,7 +5097,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5414,7 +5110,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5427,7 +5122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5440,7 +5134,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5454,7 +5147,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5467,7 +5159,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5480,8 +5171,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5494,7 +5183,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5518,7 +5206,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5532,7 +5219,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5545,8 +5231,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5560,7 +5244,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5574,7 +5257,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5587,7 +5269,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5600,7 +5281,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5614,7 +5294,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5627,7 +5306,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5640,8 +5318,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5654,7 +5330,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5678,7 +5353,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5692,7 +5366,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5705,8 +5378,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5720,7 +5391,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5734,7 +5404,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5747,7 +5416,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5760,7 +5428,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5774,7 +5441,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5787,7 +5453,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5800,8 +5465,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5814,7 +5477,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5838,7 +5500,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5852,7 +5513,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5865,8 +5525,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5880,7 +5538,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5894,7 +5551,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5907,7 +5563,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5920,7 +5575,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5934,7 +5588,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5947,7 +5600,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5960,8 +5612,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5974,7 +5624,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5998,7 +5647,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6012,7 +5660,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6025,8 +5672,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6040,7 +5685,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6054,7 +5698,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6067,7 +5710,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6080,7 +5722,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6094,7 +5735,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6107,7 +5747,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6120,8 +5759,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6134,7 +5771,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6158,7 +5794,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6172,7 +5807,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6185,8 +5819,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6200,7 +5832,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6214,7 +5845,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6227,7 +5857,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6240,7 +5869,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6254,7 +5882,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6267,7 +5894,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6280,8 +5906,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6294,7 +5918,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6318,7 +5941,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6332,7 +5954,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6345,8 +5966,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6360,7 +5979,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6374,7 +5992,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6387,7 +6004,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6400,7 +6016,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6414,7 +6029,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6427,7 +6041,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6440,8 +6053,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6454,7 +6065,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -409,7 +409,6 @@ ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v1, s1 @@ -422,7 +421,6 @@ ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_read_b32 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -435,8 +433,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_read_b32 v0, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -449,7 +445,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_read_b32 v0, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v1, v0 @@ -461,7 +456,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -473,7 +467,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -485,7 +478,6 @@ ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -498,7 +490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -510,7 +501,6 @@ ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -523,8 +513,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_load_b32 v0, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -536,7 +524,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_load_b32 v0, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 @@ -768,7 +755,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -779,7 +765,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -789,8 +774,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -800,7 +783,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -811,7 +793,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -821,7 +802,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -831,7 +811,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -841,7 +820,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -851,7 +829,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -860,8 +837,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -870,7 +845,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -887,7 +861,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -898,7 +871,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -908,8 +880,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -919,7 +889,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -930,7 +899,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -940,7 +908,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -950,7 +917,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -960,7 +926,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -970,7 +935,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -979,8 +943,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -989,7 +951,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -1231,7 +1192,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -1242,7 +1202,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -1252,8 +1211,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1263,7 +1220,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1274,7 +1230,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1284,7 +1239,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1294,7 +1248,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1304,7 +1257,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1314,7 +1266,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1323,8 +1274,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1333,7 +1282,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in) { @@ -1350,7 +1298,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1362,7 +1309,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1373,8 +1319,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1386,7 +1330,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1398,7 +1341,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1409,7 +1351,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1420,7 +1361,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1431,7 +1371,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1442,7 +1381,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1452,8 +1390,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1464,7 +1400,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1482,7 +1417,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1494,7 +1428,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1505,8 +1438,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1518,7 +1449,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1530,7 +1460,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1541,7 +1470,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1552,7 +1480,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1563,7 +1490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1574,7 +1500,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1584,8 +1509,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1596,7 +1519,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1747,7 +1669,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1760,7 +1681,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1772,8 +1692,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1786,7 +1704,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1799,7 +1716,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1811,7 +1727,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1823,7 +1738,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1836,7 +1750,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1848,7 +1761,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1860,8 +1772,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1873,7 +1783,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -1893,7 +1802,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1906,7 +1814,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1918,8 +1825,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1932,7 +1837,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1945,7 +1849,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1957,7 +1860,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1969,7 +1871,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1982,7 +1883,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1994,7 +1894,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -2006,8 +1905,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2019,7 +1916,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -2292,7 +2188,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2304,7 +2199,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_endpgm ; @@ -2315,8 +2209,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2327,7 +2219,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2339,7 +2230,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2350,7 +2240,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2361,7 +2250,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2372,7 +2260,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2383,7 +2270,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2393,8 +2279,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2404,7 +2288,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in, i32 %old) { @@ -2424,7 +2307,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2437,7 +2319,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2449,8 +2330,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2463,7 +2342,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2476,7 +2354,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2488,7 +2365,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2500,7 +2376,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2512,7 +2387,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2524,7 +2398,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2535,8 +2408,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2548,7 +2419,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2569,7 +2439,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2582,7 +2451,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2594,8 +2462,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2608,7 +2474,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2621,7 +2486,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2633,7 +2497,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2645,7 +2508,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2657,7 +2519,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2669,7 +2530,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2680,8 +2540,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2693,7 +2551,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2978,7 +2835,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2991,7 +2847,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3003,8 +2858,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3017,7 +2870,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3030,7 +2882,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3042,7 +2893,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3054,7 +2904,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3066,7 +2915,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3078,7 +2926,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3089,8 +2936,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3102,7 +2947,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3123,7 +2967,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3136,7 +2979,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3148,8 +2990,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3162,7 +3002,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3175,7 +3014,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3187,7 +3025,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3199,7 +3036,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3211,7 +3047,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3223,7 +3058,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3234,8 +3068,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3247,7 +3079,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3268,7 +3099,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3281,7 +3111,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3293,8 +3122,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3307,7 +3134,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3320,7 +3146,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3332,7 +3157,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3344,7 +3168,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3356,7 +3179,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3368,7 +3190,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3379,8 +3200,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3392,7 +3211,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3413,7 +3231,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3426,7 +3243,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3438,8 +3254,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3452,7 +3266,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3465,7 +3278,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3477,7 +3289,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3489,7 +3300,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3501,7 +3311,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3513,7 +3322,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3524,8 +3332,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3537,7 +3343,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3558,7 +3363,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3571,7 +3375,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3583,8 +3386,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3597,7 +3398,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3610,7 +3410,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3622,7 +3421,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3634,7 +3432,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3646,7 +3443,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3658,7 +3454,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3669,8 +3464,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3682,7 +3475,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3703,7 +3495,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3716,7 +3507,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3728,8 +3518,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3742,7 +3530,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3755,7 +3542,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3767,7 +3553,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3779,7 +3564,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3791,7 +3575,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3803,7 +3586,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3814,8 +3596,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3827,7 +3607,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3848,7 +3627,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3861,7 +3639,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3873,8 +3650,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3887,7 +3662,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3900,7 +3674,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3912,7 +3685,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3924,7 +3696,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3936,7 +3707,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3948,7 +3718,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3959,8 +3728,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3972,7 +3739,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3993,7 +3759,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -4006,7 +3771,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -4018,8 +3782,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4032,7 +3794,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4045,7 +3806,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4057,7 +3817,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4069,7 +3828,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4081,7 +3839,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -4093,7 +3850,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -4104,8 +3860,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4117,7 +3871,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4428,7 +4181,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4442,7 +4194,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4455,8 +4206,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -4469,7 +4218,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4483,7 +4231,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4496,7 +4243,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4509,7 +4255,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4522,7 +4267,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4535,7 +4279,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4547,8 +4290,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -4560,7 +4301,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4584,7 +4324,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4598,7 +4337,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4611,8 +4349,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4626,7 +4362,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4640,7 +4375,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4653,7 +4387,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4666,7 +4399,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4680,7 +4412,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4693,7 +4424,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4706,8 +4436,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4720,7 +4448,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4744,7 +4471,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4758,7 +4484,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4771,8 +4496,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4786,7 +4509,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4800,7 +4522,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4813,7 +4534,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4826,7 +4546,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4840,7 +4559,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4853,7 +4571,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4866,8 +4583,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4880,7 +4595,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5198,7 +4912,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5212,7 +4925,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5225,8 +4937,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5240,7 +4950,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5254,7 +4963,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5267,7 +4975,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5280,7 +4987,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5294,7 +5000,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5307,7 +5012,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5320,8 +5024,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5334,7 +5036,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5358,7 +5059,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5372,7 +5072,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5385,8 +5084,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5400,7 +5097,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5414,7 +5110,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5427,7 +5122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5440,7 +5134,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5454,7 +5147,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5467,7 +5159,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5480,8 +5171,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5494,7 +5183,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5518,7 +5206,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5532,7 +5219,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5545,8 +5231,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5560,7 +5244,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5574,7 +5257,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5587,7 +5269,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5600,7 +5281,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5614,7 +5294,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5627,7 +5306,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5640,8 +5318,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5654,7 +5330,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5678,7 +5353,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5692,7 +5366,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5705,8 +5378,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5720,7 +5391,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5734,7 +5404,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5747,7 +5416,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5760,7 +5428,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5774,7 +5441,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5787,7 +5453,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5800,8 +5465,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5814,7 +5477,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5838,7 +5500,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5852,7 +5513,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5865,8 +5525,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5880,7 +5538,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5894,7 +5551,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5907,7 +5563,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5920,7 +5575,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5934,7 +5588,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5947,7 +5600,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5960,8 +5612,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5974,7 +5624,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5998,7 +5647,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6012,7 +5660,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6025,8 +5672,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6040,7 +5685,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6054,7 +5698,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6067,7 +5710,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6080,7 +5722,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6094,7 +5735,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6107,7 +5747,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6120,8 +5759,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6134,7 +5771,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6158,7 +5794,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6172,7 +5807,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6185,8 +5819,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6200,7 +5832,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6214,7 +5845,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6227,7 +5857,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6240,7 +5869,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6254,7 +5882,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6267,7 +5894,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6280,8 +5906,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6294,7 +5918,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6318,7 +5941,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6332,7 +5954,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6345,8 +5966,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6360,7 +5979,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6374,7 +5992,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6387,7 +6004,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6400,7 +6016,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6414,7 +6029,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6427,7 +6041,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6440,8 +6053,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6454,7 +6065,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -514,7 +514,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -525,7 +524,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -535,8 +533,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -546,7 +542,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -557,7 +552,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -566,8 +560,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -576,7 +568,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -409,7 +409,6 @@ ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v1, s1 @@ -422,7 +421,6 @@ ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_read_b32 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -435,8 +433,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_read_b32 v0, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -449,7 +445,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_read_b32 v0, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v1, v0 @@ -461,7 +456,6 @@ ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_read_b32 v0, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -473,7 +467,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -485,7 +478,6 @@ ; GFX90A-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -498,7 +490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -510,7 +501,6 @@ ; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_read_b32 v0, v0 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 @@ -523,8 +513,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_load_b32 v0, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -536,7 +524,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_load_b32 v0, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v1, v0 @@ -768,7 +755,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -779,7 +765,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -789,8 +774,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -800,7 +783,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -811,7 +793,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -821,7 +802,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -831,7 +811,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -841,7 +820,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -851,7 +829,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -860,8 +837,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -870,7 +845,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -887,7 +861,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -898,7 +871,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -908,8 +880,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -919,7 +889,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -930,7 +899,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -940,7 +908,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -950,7 +917,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -960,7 +926,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -970,7 +935,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -979,8 +943,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -989,7 +951,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr addrspace(3) %out) { @@ -1231,7 +1192,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -1242,7 +1202,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_endpgm ; @@ -1252,8 +1211,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_endpgm ; @@ -1263,7 +1220,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -1274,7 +1230,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1284,7 +1239,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1294,7 +1248,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -1304,7 +1257,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1314,7 +1266,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -1323,8 +1274,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_endpgm ; @@ -1333,7 +1282,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in) { @@ -1350,7 +1298,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1362,7 +1309,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1373,8 +1319,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1386,7 +1330,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1398,7 +1341,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1409,7 +1351,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1420,7 +1361,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1431,7 +1371,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1442,7 +1381,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1452,8 +1390,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1464,7 +1400,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1482,7 +1417,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -1494,7 +1428,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -1505,8 +1438,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1518,7 +1449,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -1530,7 +1460,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1541,7 +1470,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1552,7 +1480,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1563,7 +1490,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -1574,7 +1500,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -1584,8 +1509,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1596,7 +1519,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -1747,7 +1669,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1760,7 +1681,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1772,8 +1692,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1786,7 +1704,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1799,7 +1716,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1811,7 +1727,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1823,7 +1738,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1836,7 +1750,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1848,7 +1761,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1860,8 +1772,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -1873,7 +1783,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -1893,7 +1802,6 @@ ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -1906,7 +1814,6 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -1918,8 +1825,6 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -1932,7 +1837,6 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -1945,7 +1849,6 @@ ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -1957,7 +1860,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1969,7 +1871,6 @@ ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -1982,7 +1883,6 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -1994,7 +1894,6 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -2006,8 +1905,6 @@ ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2019,7 +1916,6 @@ ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -2292,7 +2188,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_endpgm ; @@ -2304,7 +2199,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_endpgm ; @@ -2315,8 +2209,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_endpgm ; @@ -2327,7 +2219,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_endpgm ; @@ -2339,7 +2230,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2350,7 +2240,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2361,7 +2250,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; @@ -2372,7 +2260,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2383,7 +2270,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_endpgm ; @@ -2393,8 +2279,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_endpgm ; @@ -2404,7 +2288,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_endpgm ptr addrspace(3) %out, i32 %in, i32 %old) { @@ -2424,7 +2307,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2437,7 +2319,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2449,8 +2330,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2463,7 +2342,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2476,7 +2354,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2488,7 +2365,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2500,7 +2376,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2512,7 +2387,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2524,7 +2398,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2535,8 +2408,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2548,7 +2419,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2569,7 +2439,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2582,7 +2451,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -2594,8 +2462,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -2608,7 +2474,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -2621,7 +2486,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2633,7 +2497,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2645,7 +2508,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2657,7 +2519,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -2669,7 +2530,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -2680,8 +2540,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -2693,7 +2551,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -2978,7 +2835,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -2991,7 +2847,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3003,8 +2858,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3017,7 +2870,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3030,7 +2882,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3042,7 +2893,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3054,7 +2904,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3066,7 +2915,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3078,7 +2926,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3089,8 +2936,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3102,7 +2947,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3123,7 +2967,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3136,7 +2979,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3148,8 +2990,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3162,7 +3002,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3175,7 +3014,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3187,7 +3025,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3199,7 +3036,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3211,7 +3047,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3223,7 +3058,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3234,8 +3068,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3247,7 +3079,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3268,7 +3099,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3281,7 +3111,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3293,8 +3122,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3307,7 +3134,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3320,7 +3146,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3332,7 +3157,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3344,7 +3168,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3356,7 +3179,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3368,7 +3190,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3379,8 +3200,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3392,7 +3211,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3413,7 +3231,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3426,7 +3243,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3438,8 +3254,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3452,7 +3266,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3465,7 +3278,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3477,7 +3289,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3489,7 +3300,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3501,7 +3311,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3513,7 +3322,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3524,8 +3332,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3537,7 +3343,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3558,7 +3363,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3571,7 +3375,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3583,8 +3386,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3597,7 +3398,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3610,7 +3410,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3622,7 +3421,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3634,7 +3432,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3646,7 +3443,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3658,7 +3454,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3669,8 +3464,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3682,7 +3475,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3703,7 +3495,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3716,7 +3507,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3728,8 +3518,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3742,7 +3530,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3755,7 +3542,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3767,7 +3553,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3779,7 +3564,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3791,7 +3575,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3803,7 +3586,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3814,8 +3596,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3827,7 +3607,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3848,7 +3627,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -3861,7 +3639,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -3873,8 +3650,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -3887,7 +3662,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -3900,7 +3674,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -3912,7 +3685,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3924,7 +3696,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3936,7 +3707,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -3948,7 +3718,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -3959,8 +3728,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -3972,7 +3739,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -3993,7 +3759,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -4006,7 +3771,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_endpgm @@ -4018,8 +3782,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4032,7 +3794,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: s_endpgm @@ -4045,7 +3806,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -4057,7 +3817,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4069,7 +3828,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4081,7 +3839,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: s_endpgm @@ -4093,7 +3850,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_endpgm @@ -4104,8 +3860,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4117,7 +3871,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: s_endpgm @@ -4428,7 +4181,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4442,7 +4194,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4455,8 +4206,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -4469,7 +4218,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4483,7 +4231,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4496,7 +4243,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4509,7 +4255,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4522,7 +4267,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4535,7 +4279,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4547,8 +4290,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -4560,7 +4301,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4584,7 +4324,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4598,7 +4337,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4611,8 +4349,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4626,7 +4362,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4640,7 +4375,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4653,7 +4387,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4666,7 +4399,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4680,7 +4412,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4693,7 +4424,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4706,8 +4436,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4720,7 +4448,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -4744,7 +4471,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -4758,7 +4484,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -4771,8 +4496,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -4786,7 +4509,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -4800,7 +4522,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4813,7 +4534,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4826,7 +4546,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4840,7 +4559,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -4853,7 +4571,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -4866,8 +4583,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -4880,7 +4595,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5198,7 +4912,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5212,7 +4925,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5225,8 +4937,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5240,7 +4950,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5254,7 +4963,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5267,7 +4975,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5280,7 +4987,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5294,7 +5000,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5307,7 +5012,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5320,8 +5024,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5334,7 +5036,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5358,7 +5059,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5372,7 +5072,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5385,8 +5084,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5400,7 +5097,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5414,7 +5110,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5427,7 +5122,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5440,7 +5134,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5454,7 +5147,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5467,7 +5159,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5480,8 +5171,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5494,7 +5183,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5518,7 +5206,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5532,7 +5219,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5545,8 +5231,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5560,7 +5244,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5574,7 +5257,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5587,7 +5269,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5600,7 +5281,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5614,7 +5294,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5627,7 +5306,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5640,8 +5318,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5654,7 +5330,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5678,7 +5353,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5692,7 +5366,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5705,8 +5378,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5720,7 +5391,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5734,7 +5404,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5747,7 +5416,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5760,7 +5428,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5774,7 +5441,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5787,7 +5453,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5800,8 +5465,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5814,7 +5477,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5838,7 +5500,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -5852,7 +5513,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -5865,8 +5525,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -5880,7 +5538,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -5894,7 +5551,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -5907,7 +5563,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5920,7 +5575,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5934,7 +5588,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -5947,7 +5600,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -5960,8 +5612,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -5974,7 +5624,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -5998,7 +5647,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6012,7 +5660,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6025,8 +5672,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6040,7 +5685,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6054,7 +5698,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6067,7 +5710,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6080,7 +5722,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6094,7 +5735,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6107,7 +5747,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6120,8 +5759,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6134,7 +5771,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6158,7 +5794,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6172,7 +5807,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6185,8 +5819,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6200,7 +5832,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6214,7 +5845,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6227,7 +5857,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6240,7 +5869,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6254,7 +5882,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6267,7 +5894,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6280,8 +5906,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6294,7 +5918,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -6318,7 +5941,6 @@ ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -6332,7 +5954,6 @@ ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: ds_write_b32 v0, v1 @@ -6345,8 +5966,6 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv @@ -6360,7 +5979,6 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -6374,7 +5992,6 @@ ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 -; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6387,7 +6004,6 @@ ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6400,7 +6016,6 @@ ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6414,7 +6029,6 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 @@ -6427,7 +6041,6 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX940-TGSPLIT-NEXT: buffer_inv sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) @@ -6440,8 +6053,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv @@ -6454,7 +6065,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: ds_store_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -5,9 +5,9 @@ # GCN-LABEL: name: multiple_mem_operands # GCN-LABEL: bb.3: -# GCN: S_WAITCNT 3952 +# GCN: S_WAITCNT_soft 3952 # GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN -# GCN-NEXT: S_WAITCNT 3952 +# GCN-NEXT: S_WAITCNT_soft 3952 # GCN-NEXT: BUFFER_WBINVL1_VOL name: multiple_mem_operands diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll @@ -38,7 +38,6 @@ ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_mov_b32_e32 v6, v4 ; GCN-NEXT: v_mov_b32_e32 v5, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: buffer_atomic_cmpswap v[5:6], v[1:2], s[8:11], 0 addr64 offset:400 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1 @@ -98,7 +97,6 @@ ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_mov_b32_e32 v6, v4 ; GCN-NEXT: v_mov_b32_e32 v5, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: buffer_atomic_cmpswap v[5:6], v[1:2], s[4:7], 0 addr64 offset:400 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1 diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll @@ -33,7 +33,6 @@ ; GCN-NEXT: s_mov_b32 s7, s11 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_atomic_smax v0, v[1:2], s[8:11], 0 addr64 offset:400 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -76,7 +75,6 @@ ; GCN-NEXT: s_mov_b32 s5, s6 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_atomic_smax v0, v[1:2], s[4:7], 0 addr64 offset:400 ; GCN-NEXT: .LBB1_2: ; %exit ; GCN-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll @@ -0,0 +1,53 @@ +; RUN: llc -O3 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s + +; SIInsertWaitcnts should preserve waitcnt instructions coming from the user + +; CHECK-LABEL: test_waitcnt_asm +; CHECK: s_waitcnt vmcnt(0) +; CHECK: s_waitcnt vmcnt(0) +; CHECK: s_waitcnt vmcnt(0) +; CHECK-NOT: s_waitcnt +; CHECK: s_endpgm +define amdgpu_kernel void @test_waitcnt_asm() { + call void asm sideeffect "s_waitcnt vmcnt(0)", ""() + call void asm sideeffect "s_waitcnt vmcnt(0)", ""() + call void asm sideeffect "s_waitcnt vmcnt(0)", ""() + ret void +} + +; CHECK-LABEL: test_waitcnt_vscnt_asm +; CHECK: s_waitcnt_vscnt null, 0x0 +; CHECK: s_waitcnt_vscnt null, 0x0 +; CHECK: s_waitcnt_vscnt null, 0x0 +; CHECK-NOT: s_waitcnt +; CHECK: s_endpgm +define amdgpu_kernel void @test_waitcnt_vscnt_asm() { + call void asm sideeffect "s_waitcnt_vscnt null, 0x0", ""() + call void asm sideeffect "s_waitcnt_vscnt null, 0x0", ""() + call void asm sideeffect "s_waitcnt_vscnt null, 0x0", ""() + ret void +} + +; CHECK-LABEL: test_waitcnt_builtin +; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NOT: s_waitcnt +; CHECK: s_endpgm +define amdgpu_kernel void @test_waitcnt_builtin() { + call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +; CHECK-LABEL: test_waitcnt_builtin_non_kernel +; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NOT: s_waitcnt +; CHECK: s_setpc +define void @test_waitcnt_builtin_non_kernel() { + call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +declare void @llvm.amdgcn.s.waitcnt(i32) diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir --- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir +++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir @@ -26,18 +26,18 @@ --- name: tbuffer_store1 +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: tbuffer_store1 - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec + ; OPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: tbuffer_store1 - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec + ; NOOPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec ; NOOPT-NEXT: S_ENDPGM 0 TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec S_ENDPGM 0 @@ -45,18 +45,18 @@ --- name: tbuffer_store2 +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: tbuffer_store2 - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; OPT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: tbuffer_store2 - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) + ; NOOPT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) ; NOOPT-NEXT: S_ENDPGM 0 TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) S_ENDPGM 0 @@ -64,11 +64,12 @@ --- name: flat_store +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: flat_store - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr + ; CHECK: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr ; CHECK-NEXT: S_ENDPGM 0 FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -76,19 +77,19 @@ --- name: global_store +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: global_store - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + ; OPT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: global_store - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + ; NOOPT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; NOOPT-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec @@ -98,18 +99,18 @@ --- name: buffer_store_format +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: buffer_store_format - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec + ; OPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: buffer_store_format - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec + ; NOOPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec ; NOOPT-NEXT: S_ENDPGM 0 BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -117,12 +118,13 @@ --- name: ds_write_b32 +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: ds_write_b32 ; CHECK: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF - ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 renamable $vgpr0 = IMPLICIT_DEF @@ -133,6 +135,8 @@ ... --- name: global_store_dword +machineFunctionInfo: + isEntryFunction: true body: | bb.0: liveins: $vgpr0, $sgpr0_sgpr1 @@ -140,7 +144,6 @@ ; OPT-LABEL: name: global_store_dword ; OPT: liveins: $vgpr0, $sgpr0_sgpr1 ; OPT-NEXT: {{ $}} - ; OPT-NEXT: S_WAITCNT 0 ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec ; OPT-NEXT: S_NOP 0 @@ -150,7 +153,6 @@ ; NOOPT-LABEL: name: global_store_dword ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1 ; NOOPT-NEXT: {{ $}} - ; NOOPT-NEXT: S_WAITCNT 0 ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec ; NOOPT-NEXT: S_ENDPGM 0 @@ -161,12 +163,13 @@ --- name: multiple_basic_blocks1 +machineFunctionInfo: + isEntryFunction: true body: | ; CHECK-LABEL: name: multiple_basic_blocks1 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} @@ -205,12 +208,13 @@ ... --- name: multiple_basic_blocks2 +machineFunctionInfo: + isEntryFunction: true body: | ; OPT-LABEL: name: multiple_basic_blocks2 ; OPT: bb.0: ; OPT-NEXT: successors: %bb.2(0x80000000) ; OPT-NEXT: {{ $}} - ; OPT-NEXT: S_WAITCNT 0 ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec ; OPT-NEXT: S_BRANCH %bb.2 @@ -231,7 +235,6 @@ ; NOOPT: bb.0: ; NOOPT-NEXT: successors: %bb.2(0x80000000) ; NOOPT-NEXT: {{ $}} - ; NOOPT-NEXT: S_WAITCNT 0 ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec ; NOOPT-NEXT: S_BRANCH %bb.2 @@ -267,12 +270,13 @@ # One parent block has a VMEM store, release VGPRs --- name: multiple_basic_blocks3 +machineFunctionInfo: + isEntryFunction: true body: | ; OPT-LABEL: name: multiple_basic_blocks3 ; OPT: bb.0: ; OPT-NEXT: successors: %bb.2(0x80000000) ; OPT-NEXT: {{ $}} - ; OPT-NEXT: S_WAITCNT 0 ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec ; OPT-NEXT: S_BRANCH %bb.2 @@ -303,7 +307,6 @@ ; NOOPT: bb.0: ; NOOPT-NEXT: successors: %bb.2(0x80000000) ; NOOPT-NEXT: {{ $}} - ; NOOPT-NEXT: S_WAITCNT 0 ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec ; NOOPT-NEXT: S_BRANCH %bb.2 @@ -357,12 +360,13 @@ --- name: recursive_loop +machineFunctionInfo: + isEntryFunction: true body: | ; CHECK-LABEL: name: recursive_loop ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} @@ -394,12 +398,13 @@ --- name: recursive_loop_vmem +machineFunctionInfo: + isEntryFunction: true body: | ; OPT-LABEL: name: recursive_loop_vmem ; OPT: bb.0: ; OPT-NEXT: successors: %bb.1(0x80000000) ; OPT-NEXT: {{ $}} - ; OPT-NEXT: S_WAITCNT 0 ; OPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec ; OPT-NEXT: S_BRANCH %bb.1 ; OPT-NEXT: {{ $}} @@ -421,7 +426,6 @@ ; NOOPT: bb.0: ; NOOPT-NEXT: successors: %bb.1(0x80000000) ; NOOPT-NEXT: {{ $}} - ; NOOPT-NEXT: S_WAITCNT 0 ; NOOPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec ; NOOPT-NEXT: S_BRANCH %bb.1 ; NOOPT-NEXT: {{ $}} @@ -456,18 +460,18 @@ --- name: image_store +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: image_store - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) + ; OPT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: image_store - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) + ; NOOPT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) ; NOOPT-NEXT: S_ENDPGM 0 IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) S_ENDPGM 0 @@ -475,11 +479,12 @@ --- name: scratch_store +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: scratch_store - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc + ; CHECK: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr ; CHECK-NEXT: S_ENDPGM 0 renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc @@ -489,18 +494,18 @@ --- name: buffer_atomic +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; OPT-LABEL: name: buffer_atomic - ; OPT: S_WAITCNT 0 - ; OPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; OPT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; OPT-NEXT: S_NOP 0 ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; OPT-NEXT: S_ENDPGM 0 ; ; NOOPT-LABEL: name: buffer_atomic - ; NOOPT: S_WAITCNT 0 - ; NOOPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) + ; NOOPT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) ; NOOPT-NEXT: S_ENDPGM 0 BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) S_ENDPGM 0 @@ -508,11 +513,12 @@ --- name: flat_atomic +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: flat_atomic - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr + ; CHECK: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr ; CHECK-NEXT: S_ENDPGM 0 renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -521,11 +527,12 @@ --- name: global_atomic +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: global_atomic - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec + ; CHECK: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec S_ENDPGM 0 @@ -533,11 +540,12 @@ --- name: image_atomic +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: image_atomic - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) S_ENDPGM 0 @@ -545,11 +553,12 @@ --- name: global_store_optnone +machineFunctionInfo: + isEntryFunction: true body: | bb.0: ; CHECK-LABEL: name: global_store_optnone - ; CHECK: S_WAITCNT 0 - ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec + ; CHECK: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; CHECK-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll @@ -7,7 +7,6 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_lshlrev_b64 v[0:1], 2, v[4:5] ; GCN-NEXT: v_mov_b32_e32 v6, 3 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: global_atomic_and v[0:1], v6, off offset:512 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_wbinvl1_vol diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -26,7 +26,7 @@ ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN: S_WAITCNT 3952 + ; GCN: S_WAITCNT_soft 3952 ; GCN: bb.3: entry: %cc = icmp sgt i32 %a, 0 @@ -63,7 +63,7 @@ ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN: S_WAITCNT 3952 + ; GCN: S_WAITCNT_soft 3952 ; GCN: bb.5: entry: %cc = icmp sgt i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -57,8 +57,9 @@ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 +; GFX11-NEXT: s_waitcnt expcnt(0) ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: buffer_store_b32 v0, v0, s[4:7], 0 offen ; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir @@ -5,8 +5,7 @@ ... # CHECK-LABEL: name: waitcnt-no-redundant -# CHECK: S_WAITCNT 3952 -# CHECK-NEXT: FLAT_ATOMIC_CMPSWAP +# CHECK: FLAT_ATOMIC_CMPSWAP # CHECK-NEXT: S_WAITCNT 3952 # CHECK-NEXT: BUFFER_WBINVL1 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -19,6 +19,7 @@ ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_unmodified ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -31,7 +32,7 @@ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - S_WAITCNT_VSCNT undef $sgpr_null, 0 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 S_BARRIER $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr @@ -55,6 +56,7 @@ ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_needs_vscnt ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -67,7 +69,7 @@ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - S_WAITCNT_VSCNT undef $sgpr_null, 1 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 1 S_BARRIER $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr @@ -85,19 +87,18 @@ ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10-NEXT: S_BARRIER ; GFX10-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_with_other_waitcnt ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT 112 ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: S_BARRIER ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr @@ -105,8 +106,8 @@ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - S_WAITCNT 112 - S_WAITCNT_VSCNT undef $sgpr_null, 0 + S_WAITCNT_soft 112 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 S_BARRIER $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr @@ -130,6 +131,7 @@ ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_combined ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -142,9 +144,9 @@ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - S_WAITCNT_VSCNT undef $sgpr_null, 0 - S_WAITCNT_VSCNT undef $sgpr_null, 1 - S_WAITCNT_VSCNT undef $sgpr_null, 2 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 1 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 2 S_BARRIER $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr @@ -162,19 +164,18 @@ ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: S_WAITCNT 0 ; GFX10-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX10-NEXT: S_WAITCNT 0 ; GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 1 ; GFX10-NEXT: S_BARRIER ; GFX10-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_WAITCNT 112 ; GFX10-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: test_waitcnt_preexisting_vscnt_combined_both_types ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: S_WAITCNT 0 ; GFX11-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT 0 ; GFX11-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX11-NEXT: S_BARRIER ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr @@ -182,11 +183,11 @@ ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - S_WAITCNT 0 - S_WAITCNT_VSCNT undef $sgpr_null, 1 - S_WAITCNT 0 - S_WAITCNT_VSCNT undef $sgpr_null, 2 - S_WAITCNT 0 + S_WAITCNT_soft 0 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 1 + S_WAITCNT_soft 0 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 2 + S_WAITCNT_soft 0 S_BARRIER $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir @@ -13,7 +13,7 @@ bb.0: liveins: $sgpr0_sgpr1 $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s32) from `ptr addrspace(4) undef`) - S_WAITCNT_VSCNT undef $sgpr_null, 0 + S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 $vgpr0 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32), addrspace 1) S_CMP_LG_U32 killed $sgpr4, 0, implicit-def $scc ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -181,7 +181,7 @@ # CHECK-LABEL: name: preexisting_waitcnt{{$}} # CHECK: FLAT_LOAD_DWORD -# CHECK-NEXT: S_WAITCNT 0 +# CHECK-NEXT: S_WAITCNT 0 # CHECK-NOT: S_WAITCNT name: preexisting_waitcnt tracksRegLiveness: true @@ -226,7 +226,7 @@ # See the waitcnt inside the bundle and don't insert an extra # CHECK-LABEL: name: preexisting_waitcnt_in_bundle{{$}} # CHECK: FLAT_LOAD_DWORD -# CHECK: S_WAITCNT 0 +# CHECK: S_WAITCNT 0 # CHECK-NOT: S_WAITCNT name: preexisting_waitcnt_in_bundle tracksRegLiveness: true