diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -88,19 +88,20 @@ }; enum WaitEventType { - VMEM_ACCESS, // vector-memory read & write - VMEM_READ_ACCESS, // vector-memory read - VMEM_WRITE_ACCESS, // vector-memory write - LDS_ACCESS, // lds read & write - GDS_ACCESS, // gds read & write - SQ_MESSAGE, // send message - SMEM_ACCESS, // scalar-memory read & write - EXP_GPR_LOCK, // export holding on its data src - GDS_GPR_LOCK, // GDS holding on its data and addr src - EXP_POS_ACCESS, // write to export position - EXP_PARAM_ACCESS, // write to export parameter - VMW_GPR_LOCK, // vector-memory write holding on its data src - EXP_LDS_ACCESS, // read by ldsdir counting as export + VMEM_ACCESS, // vector-memory read & write + VMEM_READ_ACCESS, // vector-memory read + VMEM_WRITE_ACCESS, // vector-memory write that is not scratch + SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch + LDS_ACCESS, // lds read & write + GDS_ACCESS, // gds read & write + SQ_MESSAGE, // send message + SMEM_ACCESS, // scalar-memory read & write + EXP_GPR_LOCK, // export holding on its data src + GDS_GPR_LOCK, // GDS holding on its data and addr src + EXP_POS_ACCESS, // write to export position + EXP_PARAM_ACCESS, // write to export parameter + VMW_GPR_LOCK, // vector-memory write holding on its data src + EXP_LDS_ACCESS, // read by ldsdir counting as export NUM_WAIT_EVENTS, }; @@ -110,7 +111,7 @@ (1 << SQ_MESSAGE), (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) | (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) | (1 << EXP_LDS_ACCESS), - (1 << VMEM_WRITE_ACCESS)}; + (1 << VMEM_WRITE_ACCESS) | (1 << SCRATCH_WRITE_ACCESS)}; // The mapping is: // 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs @@ -460,13 +461,19 @@ assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst)); if (!ST->hasVscnt()) return VMEM_ACCESS; - if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) + if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) { + // FLAT and SCRATCH instructions may access scratch. Other VMEM + // instructions do not. + if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst)) + return SCRATCH_WRITE_ACCESS; return VMEM_WRITE_ACCESS; + } return VMEM_READ_ACCESS; } bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const; bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; + bool mayAccessScratchThroughFlat(const MachineInstr &MI) const; bool generateWaitcntInstBefore(MachineInstr &MI, WaitcntBrackets &ScoreBrackets, MachineInstr *OldWaitcntInstr, @@ -1038,11 +1045,13 @@ } // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM // stores. In this case it can be useful to send a message to explicitly - // release all VGPRs before the stores have completed. + // release all VGPRs before the stores have completed, but it is only safe to + // do this if there are no outstanding scratch stores. else if (MI.getOpcode() == AMDGPU::S_ENDPGM || MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && - ScoreBrackets.getScoreRange(VS_CNT) != 0) + ScoreBrackets.getScoreRange(VS_CNT) != 0 && + !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS)) ReleaseVGPRInsts.insert(&MI); } // Resolve vm waits before gs-done. @@ -1398,6 +1407,32 @@ return false; } +// This is a flat memory operation. Check to see if it has memory tokens for +// either scratch or FLAT. +bool SIInsertWaitcnts::mayAccessScratchThroughFlat( + const MachineInstr &MI) const { + assert(TII->isFLAT(MI)); + + // SCRATCH instructions always access scratch. + if (TII->isFLATScratch(MI)) + return true; + + // GLOBAL instructions always access scratch. + if (TII->isFLATGlobal(MI)) + return false; + + // If there are no memory operands then conservatively assume the flat + // operation may access scratch. + if (MI.memoperands_empty()) + return true; + + // See if any memory operand specifies an address space that involves scratch. + return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) { + unsigned AS = Memop->getAddrSpace(); + return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS; + }); +} + void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, WaitcntBrackets *ScoreBrackets) { // Now look at the instruction opcode. If it is a memory access diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -54,7 +54,6 @@ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 4, addrspace(5) store i32 0, ptr addrspace(5) %alloca @@ -194,7 +193,6 @@ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 16, addrspace(5) store i32 0, ptr addrspace(5) %alloca @@ -337,7 +335,6 @@ ; GFX11-NEXT: s_add_u32 s0, s32, s0 ; GFX11-NEXT: s_and_b32 s0, s0, 0xfffffc00 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %alloca = alloca i32, i32 %n, align 32, addrspace(5) store i32 0, ptr addrspace(5) %alloca diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -1471,7 +1471,6 @@ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) store i32 %result, ptr %out @@ -1551,7 +1550,6 @@ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) @@ -1577,7 +1575,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) ret void @@ -1637,7 +1634,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:16 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) @@ -1743,7 +1739,6 @@ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id @@ -1824,7 +1819,6 @@ ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: v_mov_b32_e32 v2, 42 ; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, ptr %ptr, i32 %id @@ -2005,7 +1999,6 @@ ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) store i64 %result, ptr %out @@ -2100,7 +2093,6 @@ ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) @@ -2128,7 +2120,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) ret void @@ -2193,7 +2184,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: flat_atomic_inc_u64 v[2:3], v[0:1] offset:32 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) @@ -2311,7 +2301,6 @@ ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id @@ -2397,7 +2386,6 @@ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, ptr %ptr, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -698,7 +698,6 @@ ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v9, v10, v[0:2], v[3:5], v[6:8]], s[4:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid @@ -802,7 +801,6 @@ ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v6, v7, v[0:2], v[3:5]], s[4:7] a16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid @@ -916,7 +914,6 @@ ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[9:10], v11, v[0:2], v[3:5], v[6:8]], s[0:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid @@ -1016,7 +1013,6 @@ ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[6:7], v8, v[0:2], v[3:5]], s[0:3] a16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll @@ -93,7 +93,6 @@ ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[2:3], s0 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) store volatile double %result, ptr undef @@ -126,7 +125,6 @@ ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) store volatile double %result, ptr undef diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll --- a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -87,7 +87,6 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB2_2: ; %then ; GFX11-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -57,7 +57,6 @@ ; GFX1100-NEXT: v_mov_b32_e32 v0, 0 ; GFX1100-NEXT: scratch_store_b32 off, v0, off offset:4 dlc ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) @@ -309,7 +308,6 @@ ; GFX1100-NEXT: s_mov_b32 s33, 0 ; GFX1100-NEXT: scratch_store_b32 off, v0, s33 offset:4 dlc ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) @@ -585,7 +583,6 @@ ; GFX1100-NEXT: s_waitcnt vmcnt(0) ; GFX1100-NEXT: scratch_store_b32 off, v0, off offset:8 dlc ; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -113,7 +113,6 @@ ; GFX11-NEXT: flat_store_b32 v[0:1], v4 offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) ; GFX11-NEXT: flat_store_b32 v[0:1], v5 offset:24 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %ld0 = load i32, ptr %lb @@ -243,7 +242,6 @@ ; GFX11-NEXT: flat_store_b32 v[0:1], v4 offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) ; GFX11-NEXT: flat_store_b32 v[0:1], v5 offset:24 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %ld0 = load i32, ptr %lb diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll @@ -23,7 +23,6 @@ ; GFX11-NEXT: scratch_load_u8 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i32 1 %load = load i8, ptr addrspace(5) %gep, align 4 @@ -51,7 +50,6 @@ ; GFX11-NEXT: scratch_load_i8 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i32 1 %load = load i8, ptr addrspace(5) %gep, align 4 @@ -79,7 +77,6 @@ ; GFX11-NEXT: scratch_load_u16 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i32 1 %load = load i16, ptr addrspace(5) %gep, align 4 @@ -107,7 +104,6 @@ ; GFX11-NEXT: scratch_load_i16 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i32 1 %load = load i16, ptr addrspace(5) %gep, align 4 @@ -136,7 +132,6 @@ ; GFX11-NEXT: scratch_load_d16_u8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -167,7 +162,6 @@ ; GFX11-NEXT: scratch_load_d16_i8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -198,7 +192,6 @@ ; GFX11-NEXT: scratch_load_d16_b16 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i16, ptr addrspace(5) %in, i64 1 @@ -229,7 +222,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_u8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -260,7 +252,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_i8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -291,7 +282,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_b16 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i16, ptr addrspace(5) %in, i64 1 @@ -320,7 +310,6 @@ ; GFX11-NEXT: v_add_nc_u32_e32 v1, 4, v2 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b8 v1, v0, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <4 x i8>, ptr %in @@ -349,7 +338,6 @@ ; GFX11-NEXT: v_add_nc_u32_e32 v1, 2, v2 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b16 v1, v0, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <2 x i16>, ptr %in @@ -383,7 +371,6 @@ ; GFX11-NEXT: scratch_load_u8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i32 1 %load = load i8, ptr addrspace(5) %gep, align 4 @@ -411,7 +398,6 @@ ; GFX11-NEXT: scratch_load_i8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i8, ptr addrspace(5) %in, i32 1 %load = load i8, ptr addrspace(5) %gep, align 4 @@ -439,7 +425,6 @@ ; GFX11-NEXT: scratch_load_u16 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i32 1 %load = load i16, ptr addrspace(5) %gep, align 4 @@ -467,7 +452,6 @@ ; GFX11-NEXT: scratch_load_i16 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr inbounds i16, ptr addrspace(5) %in, i32 1 %load = load i16, ptr addrspace(5) %gep, align 4 @@ -497,7 +481,6 @@ ; GFX11-NEXT: scratch_load_d16_u8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -529,7 +512,6 @@ ; GFX11-NEXT: scratch_load_d16_i8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -561,7 +543,6 @@ ; GFX11-NEXT: scratch_load_d16_b16 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i16, ptr addrspace(5) %in, i64 1 @@ -593,7 +574,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_u8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -625,7 +605,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_i8 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i8, ptr addrspace(5) %in, i64 1 @@ -657,7 +636,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_b16 v2, off, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %gep = getelementptr i16, ptr addrspace(5) %in, i64 1 @@ -686,7 +664,6 @@ ; GFX11-NEXT: s_add_i32 s0, s0, 4 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b8 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <4 x i8>, ptr %in @@ -715,7 +692,6 @@ ; GFX11-NEXT: s_add_i32 s0, s0, 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b16 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <2 x i16>, ptr %in @@ -752,7 +728,6 @@ ; GFX11-NEXT: scratch_load_u8 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %voffset4 = mul i32 %voffset, 4 %gep0 = getelementptr inbounds i8, ptr addrspace(5) %in, i32 %voffset4 @@ -785,7 +760,6 @@ ; GFX11-NEXT: scratch_load_i8 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %voffset4 = mul i32 %voffset, 4 %gep0 = getelementptr inbounds i8, ptr addrspace(5) %in, i32 %voffset4 @@ -818,7 +792,6 @@ ; GFX11-NEXT: scratch_load_u16 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %voffset4 = mul i32 %voffset, 4 %gep0 = getelementptr inbounds i8, ptr addrspace(5) %in, i32 %voffset4 @@ -851,7 +824,6 @@ ; GFX11-NEXT: scratch_load_i16 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %voffset4 = mul i32 %voffset, 4 %gep0 = getelementptr inbounds i8, ptr addrspace(5) %in, i32 %voffset4 @@ -885,7 +857,6 @@ ; GFX11-NEXT: scratch_load_d16_u8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -921,7 +892,6 @@ ; GFX11-NEXT: scratch_load_d16_i8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -957,7 +927,6 @@ ; GFX11-NEXT: scratch_load_d16_b16 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -993,7 +962,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_u8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -1029,7 +997,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_i8 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -1065,7 +1032,6 @@ ; GFX11-NEXT: scratch_load_d16_hi_b16 v3, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b32 v[1:2], v3 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %voffset4 = mul i32 %voffset, 4 @@ -1099,7 +1065,6 @@ ; GFX11-NEXT: v_add3_u32 v1, s0, v1, 4 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b8 v1, v0, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <4 x i8>, ptr %in @@ -1133,7 +1098,6 @@ ; GFX11-NEXT: v_add3_u32 v1, s0, v1, 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: scratch_store_d16_hi_b16 v1, v0, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: %load = load <2 x i16>, ptr %in diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -67,7 +67,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff1_voff1: @@ -86,7 +85,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff1 = mul i32 %soff, 1 @@ -164,7 +162,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff1_voff2: @@ -184,7 +181,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff1 = mul i32 %soff, 1 @@ -262,7 +258,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff1_voff4: @@ -282,7 +277,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff1 = mul i32 %soff, 1 @@ -361,7 +355,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff2_voff1: @@ -381,7 +374,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff2 = mul i32 %soff, 2 @@ -463,7 +455,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff2_voff2: @@ -485,7 +476,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff2 = mul i32 %soff, 2 @@ -567,7 +557,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff2_voff4: @@ -589,7 +578,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff2 = mul i32 %soff, 2 @@ -668,7 +656,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff4_voff1: @@ -688,7 +675,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff4 = mul i32 %soff, 4 @@ -770,7 +756,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff4_voff2: @@ -792,7 +777,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff4 = mul i32 %soff, 4 @@ -874,7 +858,6 @@ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX11-GISEL-LABEL: soff4_voff4: @@ -896,7 +879,6 @@ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v3, off dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm bb: %soff4 = mul i32 %soff, 4 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -61,7 +61,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:36 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:20 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:4 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: zero_init_kernel: @@ -165,7 +164,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:36 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:20 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:4 -; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %alloca = alloca [32 x i16], align 2, addrspace(5) call void @llvm.memset.p5.i64(ptr addrspace(5) align 2 dereferenceable(64) %alloca, i8 0, i64 64, i1 false) @@ -937,7 +935,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:276 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:292 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:308 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: zero_init_small_offset_kernel: @@ -1051,7 +1048,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:276 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:292 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:308 -; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %padding = alloca [64 x i32], align 4, addrspace(5) %alloca = alloca [32 x i16], align 2, addrspace(5) @@ -1935,7 +1931,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: zero_init_large_offset_kernel: @@ -2054,7 +2049,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:16 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:48 -; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %padding = alloca [4096 x i32], align 4, addrspace(5) %alloca = alloca [32 x i16], align 2, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll @@ -16,7 +16,6 @@ ; GFX11-LABEL: flat_atomic_fmin_f32_noret: ; GFX11: ; %bb.0: ; GFX11-NEXT: flat_atomic_min_f32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data) ret void @@ -31,7 +30,6 @@ ; GFX11-LABEL: flat_atomic_fmax_f32_noret: ; GFX11: ; %bb.0: ; GFX11-NEXT: flat_atomic_max_f32 v[0:1], v2 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data) ret void diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -173,7 +173,6 @@ ; GFX11-NEXT: ds_min_rtn_f32 v0, v3, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; G_SI-LABEL: lds_ds_fmin: @@ -333,7 +332,6 @@ ; G_GFX11-NEXT: ds_min_rtn_f32 v0, v3, v0 ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: scratch_store_b32 off, v0, s0 -; G_GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; G_GFX11-NEXT: s_endpgm %idx.add = add nuw i32 %idx, 4 %shl0 = shl i32 %idx.add, 3 @@ -501,7 +499,6 @@ ; GFX11-NEXT: ds_max_rtn_f32 v0, v3, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; G_SI-LABEL: lds_ds_fmax: @@ -661,7 +658,6 @@ ; G_GFX11-NEXT: ds_max_rtn_f32 v0, v3, v0 ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: scratch_store_b32 off, v0, s0 -; G_GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; G_GFX11-NEXT: s_endpgm %idx.add = add nuw i32 %idx, 4 %shl0 = shl i32 %idx.add, 3 @@ -860,7 +856,6 @@ ; GFX11-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; G_SI-LABEL: lds_ds_fmin_f64: @@ -1049,7 +1044,6 @@ ; G_GFX11-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: scratch_store_b64 off, v[0:1], s0 -; G_GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; G_GFX11-NEXT: s_endpgm %idx.add = add nuw i32 %idx, 4 %shl0 = shl i32 %idx.add, 3 @@ -1248,7 +1242,6 @@ ; GFX11-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; G_SI-LABEL: lds_ds_fmax_f64: @@ -1437,7 +1430,6 @@ ; G_GFX11-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] ; G_GFX11-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX11-NEXT: scratch_store_b64 off, v[0:1], s0 -; G_GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; G_GFX11-NEXT: s_endpgm %idx.add = add nuw i32 %idx, 4 %shl0 = shl i32 %idx.add, 3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll @@ -42,7 +42,6 @@ ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) ; SDAG-GFX11-NEXT: v_dot2_bf16_bf16_e64_dpp v0, v2, v0, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SDAG-GFX11-NEXT: scratch_store_b16 off, v0, s0 -; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; SDAG-GFX11-NEXT: s_endpgm ; ; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_dpp: @@ -55,7 +54,6 @@ ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX11-NEXT: v_dot2_bf16_bf16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0 -; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GISEL-GFX11-NEXT: s_endpgm ptr addrspace(5) %r, ptr addrspace(5) %a, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll @@ -42,7 +42,6 @@ ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) ; SDAG-GFX11-NEXT: v_dot2_f16_f16_e64_dpp v0, v2, v0, v1 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SDAG-GFX11-NEXT: scratch_store_b16 off, v0, s0 -; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; SDAG-GFX11-NEXT: s_endpgm ; ; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16_dpp: @@ -55,7 +54,6 @@ ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) ; GISEL-GFX11-NEXT: v_dot2_f16_f16_e64_dpp v0, v0, v1, v2 quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0 -; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GISEL-GFX11-NEXT: s_endpgm ptr addrspace(5) %r, ptr addrspace(5) %a, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -236,7 +236,6 @@ ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v9, v10, v[6:8], v[3:5], v[0:2]], s[4:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -326,7 +325,6 @@ ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v6, v7, v[3:5], v[0:2]], s[4:7] a16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -422,7 +420,6 @@ ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[9:10], v11, v[6:8], v[3:5], v[0:2]], s[0:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -508,7 +505,6 @@ ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[6:7], v8, v[3:5], v[0:2]], s[0:3] a16 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: flat_store_b128 v[0:1], v[0:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_unordered_load: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -259,7 +257,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_load: @@ -271,7 +268,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -405,7 +401,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_load: @@ -419,7 +414,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -565,7 +559,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_load: @@ -581,7 +574,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -686,7 +678,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_unordered_store: @@ -698,7 +689,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -802,7 +792,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_store: @@ -814,7 +803,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -932,7 +920,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_store: @@ -946,7 +933,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1064,7 +1050,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_store: @@ -1078,7 +1063,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1182,7 +1166,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_atomicrmw: @@ -1194,7 +1177,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1453,7 +1435,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_atomicrmw: @@ -1467,7 +1448,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1914,7 +1894,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_ret_atomicrmw: @@ -1930,7 +1909,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2078,7 +2056,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_ret_atomicrmw: @@ -2096,7 +2073,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2244,7 +2220,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_ret_atomicrmw: @@ -2262,7 +2237,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2367,7 +2341,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_monotonic_cmpxchg: @@ -2377,7 +2350,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2636,7 +2608,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_monotonic_cmpxchg: @@ -2648,7 +2619,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4619,7 +4589,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: @@ -4631,7 +4600,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4775,7 +4743,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: @@ -4789,7 +4756,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4936,7 +4902,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_monotonic_ret_cmpxchg: @@ -4950,7 +4915,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5108,7 +5072,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: @@ -5124,7 +5087,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5282,7 +5244,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: @@ -5298,7 +5259,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5442,7 +5402,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: @@ -5456,7 +5415,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5600,7 +5558,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: @@ -5614,7 +5571,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5772,7 +5728,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_acquire_ret_cmpxchg: @@ -5788,7 +5743,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5946,7 +5900,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: @@ -5962,7 +5915,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6120,7 +6072,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: @@ -6136,7 +6087,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6294,7 +6244,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: @@ -6310,7 +6259,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6468,7 +6416,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: @@ -6484,7 +6431,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6642,7 +6588,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: @@ -6658,7 +6603,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6816,7 +6760,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: @@ -6832,7 +6775,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6990,7 +6932,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -7006,7 +6947,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7131,7 +7071,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_unordered_load: @@ -7143,7 +7082,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7266,7 +7204,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_load: @@ -7278,7 +7215,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7419,7 +7355,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_load: @@ -7434,7 +7369,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7587,7 +7521,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_load: @@ -7604,7 +7537,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7709,7 +7641,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_unordered_store: @@ -7721,7 +7652,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7825,7 +7755,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_store: @@ -7837,7 +7766,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7955,7 +7883,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_store: @@ -7969,7 +7896,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -8087,7 +8013,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_store: @@ -8101,7 +8026,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -8205,7 +8129,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_atomicrmw: @@ -8217,7 +8140,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8472,7 +8394,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_atomicrmw: @@ -8486,7 +8407,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8931,7 +8851,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: @@ -8948,7 +8867,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9102,7 +9020,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: @@ -9121,7 +9038,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9275,7 +9191,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: @@ -9294,7 +9209,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9399,7 +9313,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: @@ -9409,7 +9322,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9664,7 +9576,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: @@ -9676,7 +9587,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11599,7 +11509,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: @@ -11611,7 +11520,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11762,7 +11670,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -11777,7 +11684,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11924,7 +11830,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: @@ -11938,7 +11843,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12103,7 +12007,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12120,7 +12023,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12285,7 +12187,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12302,7 +12203,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12453,7 +12353,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: @@ -12468,7 +12367,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12619,7 +12517,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -12634,7 +12531,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12799,7 +12695,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: @@ -12816,7 +12711,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12981,7 +12875,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -12998,7 +12891,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13163,7 +13055,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13180,7 +13071,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13345,7 +13235,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13362,7 +13251,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13527,7 +13415,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13544,7 +13431,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13709,7 +13595,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: @@ -13726,7 +13611,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13891,7 +13775,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -13908,7 +13791,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -14073,7 +13955,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -14090,7 +13971,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_load_0: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -274,7 +272,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_load_1: @@ -289,7 +286,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -414,7 +410,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_store_0: @@ -426,7 +421,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -564,7 +558,6 @@ ; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_store_1: @@ -579,7 +572,6 @@ ; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_unordered_load: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -259,7 +257,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_load: @@ -271,7 +268,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -394,7 +390,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_load: @@ -406,7 +401,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -529,7 +523,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_load: @@ -541,7 +534,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -646,7 +638,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_unordered_store: @@ -658,7 +649,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -762,7 +752,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_store: @@ -774,7 +763,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -878,7 +866,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_store: @@ -890,7 +877,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -994,7 +980,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_store: @@ -1006,7 +991,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1110,7 +1094,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_atomicrmw: @@ -1122,7 +1105,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1226,7 +1208,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_atomicrmw: @@ -1238,7 +1219,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1342,7 +1322,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_atomicrmw: @@ -1354,7 +1333,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1458,7 +1436,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_atomicrmw: @@ -1470,7 +1447,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1574,7 +1550,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_atomicrmw: @@ -1586,7 +1561,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1708,7 +1682,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_ret_atomicrmw: @@ -1722,7 +1695,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1845,7 +1817,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: @@ -1859,7 +1830,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1982,7 +1952,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: @@ -1996,7 +1965,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2101,7 +2069,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: @@ -2111,7 +2078,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2216,7 +2182,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: @@ -2226,7 +2191,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2331,7 +2295,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_monotonic_cmpxchg: @@ -2341,7 +2304,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2446,7 +2408,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: @@ -2456,7 +2417,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2561,7 +2521,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: @@ -2571,7 +2530,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2676,7 +2634,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: @@ -2686,7 +2643,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2791,7 +2747,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_acquire_cmpxchg: @@ -2801,7 +2756,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2906,7 +2860,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_acquire_cmpxchg: @@ -2916,7 +2869,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3021,7 +2973,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: @@ -3031,7 +2982,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3136,7 +3086,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: @@ -3146,7 +3095,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3251,7 +3199,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: @@ -3261,7 +3208,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3366,7 +3312,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: @@ -3376,7 +3321,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3481,7 +3425,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_seq_cst_cmpxchg: @@ -3491,7 +3434,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3596,7 +3538,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: @@ -3606,7 +3547,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3711,7 +3651,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: @@ -3721,7 +3660,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3852,7 +3790,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: @@ -3864,7 +3801,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3997,7 +3933,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: @@ -4009,7 +3944,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4142,7 +4076,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: @@ -4154,7 +4087,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4287,7 +4219,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: @@ -4299,7 +4230,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4432,7 +4362,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: @@ -4444,7 +4373,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4577,7 +4505,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: @@ -4589,7 +4516,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4722,7 +4648,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: @@ -4734,7 +4659,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4867,7 +4791,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: @@ -4879,7 +4802,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5012,7 +4934,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: @@ -5024,7 +4945,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5157,7 +5077,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: @@ -5169,7 +5088,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5302,7 +5220,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: @@ -5314,7 +5231,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5447,7 +5363,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: @@ -5459,7 +5374,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5592,7 +5506,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: @@ -5604,7 +5517,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5737,7 +5649,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: @@ -5749,7 +5660,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5882,7 +5792,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: @@ -5894,7 +5803,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6019,7 +5927,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_unordered_load: @@ -6031,7 +5938,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6154,7 +6060,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_load: @@ -6166,7 +6071,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6289,7 +6193,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_load: @@ -6301,7 +6204,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6424,7 +6326,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_load: @@ -6436,7 +6337,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6541,7 +6441,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_unordered_store: @@ -6553,7 +6452,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6657,7 +6555,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_store: @@ -6669,7 +6566,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6773,7 +6669,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_store: @@ -6785,7 +6680,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6889,7 +6783,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_store: @@ -6901,7 +6794,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7005,7 +6897,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: @@ -7017,7 +6908,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7121,7 +7011,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_atomicrmw: @@ -7133,7 +7022,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7237,7 +7125,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_atomicrmw: @@ -7249,7 +7136,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7353,7 +7239,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: @@ -7365,7 +7250,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7469,7 +7353,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: @@ -7481,7 +7364,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7603,7 +7485,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: @@ -7617,7 +7498,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7740,7 +7620,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: @@ -7754,7 +7633,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7877,7 +7755,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: @@ -7891,7 +7768,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7996,7 +7872,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: @@ -8006,7 +7881,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8111,7 +7985,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: @@ -8121,7 +7994,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8226,7 +8098,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: @@ -8236,7 +8107,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8341,7 +8211,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: @@ -8351,7 +8220,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8456,7 +8324,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: @@ -8466,7 +8333,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8571,7 +8437,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: @@ -8581,7 +8446,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8686,7 +8550,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: @@ -8696,7 +8559,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8801,7 +8663,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: @@ -8811,7 +8672,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8916,7 +8776,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: @@ -8926,7 +8785,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9031,7 +8889,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: @@ -9041,7 +8898,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9146,7 +9002,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: @@ -9156,7 +9011,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9261,7 +9115,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: @@ -9271,7 +9124,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9376,7 +9228,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: @@ -9386,7 +9237,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9491,7 +9341,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: @@ -9501,7 +9350,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9606,7 +9454,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: @@ -9616,7 +9463,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9747,7 +9593,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: @@ -9759,7 +9604,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9892,7 +9736,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: @@ -9904,7 +9747,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10037,7 +9879,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: @@ -10049,7 +9890,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10182,7 +10022,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -10194,7 +10033,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10327,7 +10165,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -10339,7 +10176,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10472,7 +10308,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: @@ -10484,7 +10319,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10617,7 +10451,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: @@ -10629,7 +10462,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10762,7 +10594,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: @@ -10774,7 +10605,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10907,7 +10737,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: @@ -10919,7 +10748,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11052,7 +10880,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: @@ -11064,7 +10891,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11197,7 +11023,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -11209,7 +11034,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11342,7 +11166,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: @@ -11354,7 +11177,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11487,7 +11309,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: @@ -11499,7 +11320,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11632,7 +11452,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -11644,7 +11463,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11777,7 +11595,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -11789,7 +11606,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_unordered_load: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -259,7 +257,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_load: @@ -271,7 +268,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -407,7 +403,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_load: @@ -421,7 +416,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -569,7 +563,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_load: @@ -585,7 +578,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -690,7 +682,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_unordered_store: @@ -702,7 +693,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -806,7 +796,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_store: @@ -818,7 +807,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -938,7 +926,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_store: @@ -952,7 +939,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1072,7 +1058,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_store: @@ -1086,7 +1071,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1190,7 +1174,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_atomicrmw: @@ -1202,7 +1185,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1465,7 +1447,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_atomicrmw: @@ -1479,7 +1460,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1936,7 +1916,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_ret_atomicrmw: @@ -1952,7 +1931,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2104,7 +2082,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_ret_atomicrmw: @@ -2122,7 +2099,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2274,7 +2250,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_ret_atomicrmw: @@ -2292,7 +2267,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2397,7 +2371,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg: @@ -2407,7 +2380,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2670,7 +2642,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_monotonic_cmpxchg: @@ -2682,7 +2653,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4697,7 +4667,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: @@ -4709,7 +4678,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4855,7 +4823,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: @@ -4869,7 +4836,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5018,7 +4984,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg: @@ -5032,7 +4997,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5194,7 +5158,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: @@ -5210,7 +5173,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5372,7 +5334,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: @@ -5388,7 +5349,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5534,7 +5494,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: @@ -5548,7 +5507,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5694,7 +5652,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg: @@ -5708,7 +5665,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5870,7 +5826,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_acquire_ret_cmpxchg: @@ -5886,7 +5841,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6048,7 +6002,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: @@ -6064,7 +6017,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6226,7 +6178,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: @@ -6242,7 +6193,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6404,7 +6354,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: @@ -6420,7 +6369,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6582,7 +6530,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: @@ -6598,7 +6545,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6760,7 +6706,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg: @@ -6776,7 +6721,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6938,7 +6882,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: @@ -6954,7 +6897,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7116,7 +7058,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: @@ -7132,7 +7073,6 @@ ; GFX11-CU-NEXT: buffer_gl0_inv ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -7257,7 +7197,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_unordered_load: @@ -7269,7 +7208,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7392,7 +7330,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_load: @@ -7404,7 +7341,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7547,7 +7483,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_load: @@ -7562,7 +7497,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7717,7 +7651,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_load: @@ -7734,7 +7667,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -7839,7 +7771,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_unordered_store: @@ -7851,7 +7782,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7955,7 +7885,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_store: @@ -7967,7 +7896,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -8087,7 +8015,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_store: @@ -8101,7 +8028,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -8221,7 +8147,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_store: @@ -8235,7 +8160,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -8339,7 +8263,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_atomicrmw: @@ -8351,7 +8274,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8610,7 +8532,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_atomicrmw: @@ -8624,7 +8545,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9079,7 +8999,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw: @@ -9096,7 +9015,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9254,7 +9172,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: @@ -9273,7 +9190,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9431,7 +9347,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: @@ -9450,7 +9365,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -9555,7 +9469,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: @@ -9565,7 +9478,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9824,7 +9736,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg: @@ -9836,7 +9747,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11803,7 +11713,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: @@ -11815,7 +11724,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11968,7 +11876,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: @@ -11983,7 +11890,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12132,7 +12038,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: @@ -12146,7 +12051,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12315,7 +12219,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12332,7 +12235,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12501,7 +12403,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12518,7 +12419,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12671,7 +12571,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: @@ -12686,7 +12585,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12839,7 +12737,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: @@ -12854,7 +12751,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13023,7 +12919,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: @@ -13040,7 +12935,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13209,7 +13103,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: @@ -13226,7 +13119,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13395,7 +13287,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13412,7 +13303,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13581,7 +13471,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13598,7 +13487,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13767,7 +13655,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13784,7 +13671,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -13953,7 +13839,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: @@ -13970,7 +13855,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -14139,7 +14023,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -14156,7 +14039,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -14325,7 +14207,6 @@ ; GFX11-WGP-NEXT: buffer_gl1_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -14342,7 +14223,6 @@ ; GFX11-CU-NEXT: buffer_gl1_inv ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -73,7 +73,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_load_0: @@ -86,7 +85,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -171,7 +169,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_load_1: @@ -187,7 +184,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -265,7 +261,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_store_0: @@ -278,7 +273,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -363,7 +357,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_nontemporal_store_1: @@ -379,7 +372,6 @@ ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -457,7 +449,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_volatile_workgroup_acquire_load: @@ -470,7 +461,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -542,7 +532,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_volatile_workgroup_release_store: @@ -555,7 +544,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_unordered_load: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -259,7 +257,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_load: @@ -271,7 +268,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -394,7 +390,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_load: @@ -406,7 +401,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -529,7 +523,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_load: @@ -541,7 +534,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -646,7 +638,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_unordered_store: @@ -658,7 +649,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -762,7 +752,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_store: @@ -774,7 +763,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -878,7 +866,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_store: @@ -890,7 +877,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -994,7 +980,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_store: @@ -1006,7 +991,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1110,7 +1094,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_atomicrmw: @@ -1122,7 +1105,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1226,7 +1208,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_atomicrmw: @@ -1238,7 +1219,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1342,7 +1322,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_atomicrmw: @@ -1354,7 +1333,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1458,7 +1436,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_atomicrmw: @@ -1470,7 +1447,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1574,7 +1550,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_atomicrmw: @@ -1586,7 +1561,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1708,7 +1682,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_ret_atomicrmw: @@ -1722,7 +1695,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1845,7 +1817,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: @@ -1859,7 +1830,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1982,7 +1952,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: @@ -1996,7 +1965,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2101,7 +2069,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: @@ -2111,7 +2078,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2216,7 +2182,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: @@ -2226,7 +2191,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2331,7 +2295,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_monotonic_cmpxchg: @@ -2341,7 +2304,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2446,7 +2408,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: @@ -2456,7 +2417,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2561,7 +2521,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: @@ -2571,7 +2530,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2676,7 +2634,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: @@ -2686,7 +2643,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2791,7 +2747,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_acquire_cmpxchg: @@ -2801,7 +2756,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2906,7 +2860,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_acquire_cmpxchg: @@ -2916,7 +2869,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3021,7 +2973,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: @@ -3031,7 +2982,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3136,7 +3086,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: @@ -3146,7 +3095,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3251,7 +3199,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: @@ -3261,7 +3208,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3366,7 +3312,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: @@ -3376,7 +3321,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3481,7 +3425,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_seq_cst_cmpxchg: @@ -3491,7 +3434,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3596,7 +3538,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: @@ -3606,7 +3547,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3711,7 +3651,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: @@ -3721,7 +3660,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3852,7 +3790,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: @@ -3864,7 +3801,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3997,7 +3933,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: @@ -4009,7 +3944,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4142,7 +4076,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: @@ -4154,7 +4087,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4287,7 +4219,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: @@ -4299,7 +4230,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4432,7 +4362,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: @@ -4444,7 +4373,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4577,7 +4505,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: @@ -4589,7 +4516,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4722,7 +4648,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: @@ -4734,7 +4659,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4867,7 +4791,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: @@ -4879,7 +4802,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5012,7 +4934,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: @@ -5024,7 +4945,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5157,7 +5077,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: @@ -5169,7 +5088,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5302,7 +5220,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: @@ -5314,7 +5231,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5447,7 +5363,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: @@ -5459,7 +5374,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5592,7 +5506,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: @@ -5604,7 +5517,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5737,7 +5649,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: @@ -5749,7 +5660,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5882,7 +5792,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: @@ -5894,7 +5803,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6019,7 +5927,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_unordered_load: @@ -6031,7 +5938,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6154,7 +6060,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_load: @@ -6166,7 +6071,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6289,7 +6193,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_load: @@ -6301,7 +6204,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6424,7 +6326,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_load: @@ -6436,7 +6337,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6541,7 +6441,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_unordered_store: @@ -6553,7 +6452,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6657,7 +6555,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_store: @@ -6669,7 +6566,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6773,7 +6669,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_store: @@ -6785,7 +6680,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6889,7 +6783,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_store: @@ -6901,7 +6794,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7005,7 +6897,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: @@ -7017,7 +6908,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7121,7 +7011,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_atomicrmw: @@ -7133,7 +7022,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7237,7 +7125,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_atomicrmw: @@ -7249,7 +7136,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7353,7 +7239,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: @@ -7365,7 +7250,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7469,7 +7353,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: @@ -7481,7 +7364,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7603,7 +7485,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: @@ -7617,7 +7498,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7740,7 +7620,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: @@ -7754,7 +7633,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7877,7 +7755,6 @@ ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: @@ -7891,7 +7768,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7996,7 +7872,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: @@ -8006,7 +7881,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8111,7 +7985,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: @@ -8121,7 +7994,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8226,7 +8098,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: @@ -8236,7 +8107,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8341,7 +8211,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: @@ -8351,7 +8220,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8456,7 +8324,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: @@ -8466,7 +8333,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8571,7 +8437,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: @@ -8581,7 +8446,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8686,7 +8550,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: @@ -8696,7 +8559,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8801,7 +8663,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: @@ -8811,7 +8672,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8916,7 +8776,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: @@ -8926,7 +8785,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9031,7 +8889,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: @@ -9041,7 +8898,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9146,7 +9002,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: @@ -9156,7 +9011,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9261,7 +9115,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: @@ -9271,7 +9124,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9376,7 +9228,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: @@ -9386,7 +9237,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9491,7 +9341,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: @@ -9501,7 +9350,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9606,7 +9454,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: @@ -9616,7 +9463,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9747,7 +9593,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: @@ -9759,7 +9604,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9892,7 +9736,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: @@ -9904,7 +9747,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10037,7 +9879,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -10049,7 +9890,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10182,7 +10022,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -10194,7 +10033,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10327,7 +10165,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: @@ -10339,7 +10176,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10472,7 +10308,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: @@ -10484,7 +10319,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10617,7 +10451,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: @@ -10629,7 +10462,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10762,7 +10594,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: @@ -10774,7 +10605,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10907,7 +10737,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: @@ -10919,7 +10748,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11052,7 +10880,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -11064,7 +10891,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11197,7 +11023,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: @@ -11209,7 +11034,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11342,7 +11166,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: @@ -11354,7 +11177,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11487,7 +11309,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: @@ -11499,7 +11320,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11632,7 +11452,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -11644,7 +11463,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -124,7 +124,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_unordered_load: @@ -136,7 +135,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -259,7 +257,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_load: @@ -271,7 +268,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -403,7 +399,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acquire_load: @@ -416,7 +411,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -559,7 +553,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_load: @@ -573,7 +566,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -678,7 +670,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_unordered_store: @@ -690,7 +681,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -794,7 +784,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_store: @@ -806,7 +795,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -921,7 +909,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_store: @@ -934,7 +921,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1049,7 +1035,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_store: @@ -1062,7 +1047,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -1166,7 +1150,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_atomicrmw: @@ -1178,7 +1161,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1309,7 +1291,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1424,7 +1405,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_atomicrmw: @@ -1437,7 +1417,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1580,7 +1559,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1723,7 +1701,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -1849,7 +1826,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acquire_ret_atomicrmw: @@ -1863,7 +1839,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2001,7 +1976,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: @@ -2016,7 +1990,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2154,7 +2127,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: @@ -2169,7 +2141,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -2274,7 +2245,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: @@ -2284,7 +2254,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2414,7 +2383,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2530,7 +2498,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_monotonic_cmpxchg: @@ -2541,7 +2508,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2683,7 +2649,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2825,7 +2790,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -2955,7 +2919,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3085,7 +3048,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3227,7 +3189,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3369,7 +3330,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3511,7 +3471,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3653,7 +3612,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3784,7 +3742,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: @@ -3796,7 +3753,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -3936,7 +3892,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: @@ -3948,7 +3903,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4092,7 +4046,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: @@ -4105,7 +4058,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4256,7 +4208,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: @@ -4269,7 +4220,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4420,7 +4370,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: @@ -4433,7 +4382,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4573,7 +4521,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: @@ -4585,7 +4532,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4725,7 +4671,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: @@ -4737,7 +4682,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -4888,7 +4832,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: @@ -4901,7 +4844,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5052,7 +4994,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: @@ -5065,7 +5006,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5216,7 +5156,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: @@ -5229,7 +5168,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5380,7 +5318,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: @@ -5393,7 +5330,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5544,7 +5480,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: @@ -5557,7 +5492,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5708,7 +5642,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: @@ -5721,7 +5654,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -5872,7 +5804,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: @@ -5885,7 +5816,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6036,7 +5966,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: @@ -6049,7 +5978,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -6174,7 +6102,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_unordered_load: @@ -6186,7 +6113,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6309,7 +6235,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_load: @@ -6321,7 +6246,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6450,7 +6374,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acquire_load: @@ -6462,7 +6385,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6597,7 +6519,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_load: @@ -6609,7 +6530,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %in, ptr %out) { entry: @@ -6714,7 +6634,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_unordered_store: @@ -6726,7 +6645,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6830,7 +6748,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_store: @@ -6842,7 +6759,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -6952,7 +6868,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_store: @@ -6964,7 +6879,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7074,7 +6988,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_store: @@ -7086,7 +6999,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm i32 %in, ptr %out) { entry: @@ -7190,7 +7102,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: @@ -7202,7 +7113,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7325,7 +7235,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7435,7 +7344,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_atomicrmw: @@ -7447,7 +7355,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7576,7 +7483,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7705,7 +7611,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7833,7 +7738,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: @@ -7847,7 +7751,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -7982,7 +7885,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: @@ -7996,7 +7898,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8131,7 +8032,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: @@ -8145,7 +8045,6 @@ ; GFX11-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in) { entry: @@ -8250,7 +8149,6 @@ ; GFX11-WGP-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-WGP-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: @@ -8260,7 +8158,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8382,7 +8279,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8493,7 +8389,6 @@ ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: @@ -8503,7 +8398,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8631,7 +8525,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8759,7 +8652,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -8881,7 +8773,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9003,7 +8894,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9131,7 +9021,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9259,7 +9148,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9387,7 +9275,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9515,7 +9402,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9643,7 +9529,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9771,7 +9656,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -9899,7 +9783,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10027,7 +9910,6 @@ ; GFX11-CU-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-CU-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10158,7 +10040,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: @@ -10170,7 +10051,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10309,7 +10189,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: @@ -10321,7 +10200,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10460,7 +10338,6 @@ ; GFX11-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: @@ -10472,7 +10349,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10617,7 +10493,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -10629,7 +10504,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10774,7 +10648,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -10786,7 +10659,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -10925,7 +10797,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: @@ -10937,7 +10808,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11076,7 +10946,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: @@ -11088,7 +10957,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11233,7 +11101,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: @@ -11245,7 +11112,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11390,7 +11256,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: @@ -11402,7 +11267,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11547,7 +11411,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: @@ -11559,7 +11422,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11704,7 +11566,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -11716,7 +11577,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -11861,7 +11721,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: @@ -11873,7 +11732,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12018,7 +11876,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: @@ -12030,7 +11887,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12175,7 +12031,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -12187,7 +12042,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: @@ -12332,7 +12186,6 @@ ; GFX11-WGP-NEXT: buffer_gl0_inv ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -12344,7 +12197,6 @@ ; GFX11-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 glc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -515,7 +515,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-WGP-NEXT: scratch_store_b32 off, v0, s0 glc slc dlc -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: private_nontemporal_store_0: @@ -528,7 +527,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0 glc slc dlc -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: @@ -692,7 +690,6 @@ ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-WGP-NEXT: scratch_store_b32 v0, v1, off glc slc dlc -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: private_nontemporal_store_1: @@ -706,7 +703,6 @@ ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: scratch_store_b32 v0, v1, off glc slc dlc -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -359,7 +359,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-WGP-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: private_volatile_store_0: @@ -373,7 +372,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: @@ -489,7 +487,6 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-WGP-NEXT: scratch_store_b32 v0, v1, off dlc ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-WGP-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-WGP-NEXT: s_endpgm ; ; GFX11-CU-LABEL: private_volatile_store_1: @@ -504,7 +501,6 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX11-CU-NEXT: scratch_store_b32 v0, v1, off dlc ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-CU-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-CU-NEXT: s_endpgm ptr addrspace(1) %in, ptr addrspace(5) %out) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -1806,7 +1806,6 @@ ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 1 %load = load volatile i8, ptr %gep, align 1 @@ -1847,7 +1846,6 @@ ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 2047 %load = load volatile i8, ptr %gep, align 1 @@ -1888,7 +1886,6 @@ ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 4095 %load = load volatile i8, ptr %gep, align 1 @@ -1933,7 +1930,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_13bit_max: @@ -1960,7 +1956,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8191 %load = load volatile i8, ptr %gep, align 1 @@ -2005,7 +2000,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max: @@ -2032,7 +2026,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -2048 %load = load volatile i8, ptr %gep, align 1 @@ -2077,7 +2070,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max: @@ -2104,7 +2096,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -4096 %load = load volatile i8, ptr %gep, align 1 @@ -2149,7 +2140,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max: @@ -2176,7 +2166,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -8192 %load = load volatile i8, ptr %gep, align 1 @@ -2217,7 +2206,6 @@ ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 4095 %load = load volatile i8, ptr %gep, align 1 @@ -2262,7 +2250,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max: @@ -2289,7 +2276,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8191 %load = load volatile i8, ptr %gep, align 1 @@ -2334,7 +2320,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max: @@ -2361,7 +2346,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 16383 %load = load volatile i8, ptr %gep, align 1 @@ -2406,7 +2390,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: @@ -2433,7 +2416,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -4096 %load = load volatile i8, ptr %gep, align 1 @@ -2478,7 +2460,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: @@ -2505,7 +2486,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -8192 %load = load volatile i8, ptr %gep, align 1 @@ -2550,7 +2530,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: @@ -2577,7 +2556,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -16384 %load = load volatile i8, ptr %gep, align 1 @@ -2622,7 +2600,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0: @@ -2649,7 +2626,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589936639 %load = load volatile i8, ptr %gep, align 1 @@ -2694,7 +2670,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1: @@ -2721,7 +2696,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589936640 %load = load volatile i8, ptr %gep, align 1 @@ -2766,7 +2740,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0: @@ -2793,7 +2766,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589938687 %load = load volatile i8, ptr %gep, align 1 @@ -2839,7 +2811,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1: @@ -2866,7 +2837,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589938688 %load = load volatile i8, ptr %gep, align 1 @@ -2912,7 +2882,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0: @@ -2939,7 +2908,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589942783 %load = load volatile i8, ptr %gep, align 1 @@ -2985,7 +2953,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1: @@ -3012,7 +2979,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 8589942784 %load = load volatile i8, ptr %gep, align 1 @@ -3060,7 +3026,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: @@ -3087,7 +3052,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854773761 %load = load volatile i8, ptr %gep, align 1 @@ -3135,7 +3099,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: @@ -3162,7 +3125,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854773760 %load = load volatile i8, ptr %gep, align 1 @@ -3210,7 +3172,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: @@ -3237,7 +3198,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854771713 %load = load volatile i8, ptr %gep, align 1 @@ -3285,7 +3245,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: @@ -3312,7 +3271,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854771712 %load = load volatile i8, ptr %gep, align 1 @@ -3360,7 +3318,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: @@ -3387,7 +3344,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854767617 %load = load volatile i8, ptr %gep, align 1 @@ -3435,7 +3391,6 @@ ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-SDAG-NEXT: s_endpgm ; ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: @@ -3462,7 +3417,6 @@ ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 -; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr %p, i64 -9223372036854767616 %load = load volatile i8, ptr %gep, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -104,7 +104,6 @@ ; MUBUF11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 ; MUBUF11-NEXT: scratch_store_b32 off, v0, s0 ; MUBUF11-NEXT: .LBB0_2: ; %shader_eval_surface.exit -; MUBUF11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; MUBUF11-NEXT: s_endpgm ; ; FLATSCR11-LABEL: kernel_background_evaluate: @@ -132,7 +131,6 @@ ; FLATSCR11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 ; FLATSCR11-NEXT: scratch_store_b32 off, v0, s0 ; FLATSCR11-NEXT: .LBB0_2: ; %shader_eval_surface.exit -; FLATSCR11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; FLATSCR11-NEXT: s_endpgm entry: %sd = alloca < 1339 x i32>, align 8192, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -31,7 +31,6 @@ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -70,7 +69,6 @@ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 1 @@ -111,7 +109,6 @@ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT 112 @@ -151,7 +148,6 @@ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -194,7 +190,6 @@ ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT 0