diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -413,6 +413,7 @@ class DS_VOID : DS_Pseudo { + let LGKM_CNT = 0; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 1; @@ -438,6 +439,7 @@ [(set i32:$vdst, (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > { + let LGKM_CNT = 0; let mayLoad = 0; let mayStore = 0; let isConvergent = 1; @@ -630,7 +632,7 @@ def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; } // End SubtargetPredicate = HasDsSrc2Insts -let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { +let Uses = [EXEC], LGKM_CNT = 0, mayLoad = 0, mayStore = 0, isConvergent = 1 in { def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, SwizzleImm>; } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -29,7 +29,7 @@ ; CHECK-LABEL: {{^}}ds_bpermute_add_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -; CHECK: s_waitcnt lgkmcnt +; CHECK-NOT: s_waitcnt lgkmcnt define void @ds_bpermute_add_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %index = add i32 %base_index, 1 %byte_index = shl i32 %index, 2 @@ -40,7 +40,7 @@ ; CHECK-LABEL: {{^}}ds_bpermute_or_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -; CHECK: s_waitcnt lgkmcnt +; CHECK-NOT: s_waitcnt lgkmcnt define void @ds_bpermute_or_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %masked = and i32 %base_index, 62 %index = or i32 %masked, 1 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir @@ -3,7 +3,7 @@ ... # CHECK-LABEL: name: waitcnt-permute{{$}} # CHECK: DS_BPERMUTE_B32 -# CHECK-NEXT: S_WAITCNT 127 +# CHECK-NOT: S_WAITCNT 127 name: waitcnt-permute liveins: diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -2216,7 +2216,6 @@ ; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1] -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX9-W64-NEXT: .LBB36_2: ; %ENDIF @@ -2246,7 +2245,6 @@ ; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX10-W32-NEXT: .LBB36_2: ; %ENDIF @@ -2753,7 +2751,6 @@ ; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1] -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX9-W64-NEXT: .LBB45_2: ; %ENDIF @@ -2783,7 +2780,6 @@ ; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) ; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0 ; GFX10-W32-NEXT: .LBB45_2: ; %ENDIF @@ -2831,7 +2827,6 @@ ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) -; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v0, v0 ; GFX9-W64-NEXT: .LBB46_2: ; %ENDIF @@ -2855,7 +2850,6 @@ ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2) -; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v0, v0 ; GFX10-W32-NEXT: .LBB46_2: ; %ENDIF