diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1390,7 +1390,7 @@ // instruction, update the upper-bound of the appropriate counter's // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. - if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) { + if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst) && Inst.mayLoadOrStore()) { if (TII->isAlwaysGDS(Inst.getOpcode()) || TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -29,7 +29,7 @@ ; CHECK-LABEL: {{^}}ds_bpermute_add_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -; CHECK: s_waitcnt lgkmcnt +; CHECK-NOT: s_waitcnt lgkmcnt define void @ds_bpermute_add_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %index = add i32 %base_index, 1 %byte_index = shl i32 %index, 2 @@ -40,7 +40,7 @@ ; CHECK-LABEL: {{^}}ds_bpermute_or_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -; CHECK: s_waitcnt lgkmcnt +; CHECK-NOT: s_waitcnt lgkmcnt define void @ds_bpermute_or_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %masked = and i32 %base_index, 62 %index = or i32 %masked, 1 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir @@ -3,7 +3,7 @@ ... # CHECK-LABEL: name: waitcnt-permute{{$}} # CHECK: DS_BPERMUTE_B32 -# CHECK-NEXT: S_WAITCNT 127 +# CHECK-NOT: S_WAITCNT 127 name: waitcnt-permute liveins: diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -2216,7 +2216,6 @@ ; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1] -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX9-W64-NEXT: .LBB36_2: ; %ENDIF @@ -2246,7 +2245,6 @@ ; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX10-W32-NEXT: .LBB36_2: ; %ENDIF @@ -2753,7 +2751,6 @@ ; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1] -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX9-W64-NEXT: .LBB45_2: ; %ENDIF @@ -2783,7 +2780,6 @@ ; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX10-W32-NEXT: .LBB45_2: ; %ENDIF @@ -2831,7 +2827,6 @@ ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v0, v0 ; GFX9-W64-NEXT: .LBB46_2: ; %ENDIF @@ -2855,7 +2850,6 @@ ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v0, v0 ; GFX10-W32-NEXT: .LBB46_2: ; %ENDIF